aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/isa-build.yml22
-rw-r--r--.github/workflows/merge-and-release.yml88
-rw-r--r--.gitignore3
-rw-r--r--README.md6
-rw-r--r--build/.gitignore2
-rw-r--r--build/Makefile37
-rw-r--r--dependencies/Gemfile1
-rw-r--r--marchid.md10
-rw-r--r--src/a-st-ext.adoc23
-rw-r--r--src/c-st-ext.adoc33
-rw-r--r--src/calling-convention.adoc29
-rw-r--r--src/cmo.adoc1130
-rw-r--r--src/colophon.adoc27
-rw-r--r--src/counters.adoc4
-rw-r--r--src/d-st-ext.adoc6
-rw-r--r--src/example/memcpy.s17
-rw-r--r--src/example/saxpy.s29
-rw-r--r--src/example/sgemm.S221
-rw-r--r--src/example/strcmp.s34
-rw-r--r--src/example/strcpy.s20
-rw-r--r--src/example/strlen.s22
-rw-r--r--src/example/strncpy.s36
-rw-r--r--src/example/vvaddint32.s22
-rw-r--r--src/extending.adoc2
-rw-r--r--src/f-st-ext.adoc12
-rw-r--r--src/fraclmul.adoc174
-rw-r--r--src/hypervisor.adoc161
-rw-r--r--src/images/bytefield/hedelegreg.edn6
-rw-r--r--src/images/bytefield/henvcfg.edn23
-rw-r--r--src/images/bytefield/hstatusreg-rv32.edn4
-rw-r--r--src/images/bytefield/hstatusreg.edn4
-rw-r--r--src/images/bytefield/hypv-mstatus.edn6
-rw-r--r--src/images/bytefield/medeleg.adoc6
-rw-r--r--src/images/bytefield/menvcfgreg.adoc16
-rw-r--r--src/images/bytefield/miereg-standard.adoc12
-rw-r--r--src/images/bytefield/mipreg-standard.adoc12
-rw-r--r--src/images/bytefield/mncause.edn6
-rw-r--r--src/images/bytefield/mnstatus.edn20
-rw-r--r--src/images/bytefield/mseccfg.adoc10
-rw-r--r--src/images/bytefield/priv-instr-set.edn8
-rw-r--r--src/images/bytefield/rv32satp.edn2
-rw-r--r--src/images/bytefield/rvc-instr-quad1.adoc4
-rw-r--r--src/images/bytefield/senvcfg.edn18
-rw-r--r--src/images/bytefield/siereg-standard.edn17
-rw-r--r--src/images/bytefield/sipreg-standard.edn17
-rw-r--r--src/images/bytefield/vsepcreg.edn2
-rw-r--r--src/images/bytefield/vsscratchreg.edn2
-rw-r--r--src/images/bytefield/vsstatusreg.edn6
-rw-r--r--src/images/riscv-horizontal-color.svg36
-rw-r--r--src/images/smepmp-visual-representation.pngbin0 -> 89113 bytes
-rw-r--r--src/images/wavedrom/c-ci.adoc2
-rw-r--r--src/images/wavedrom/c-ciw.adoc2
-rw-r--r--src/images/wavedrom/c-cs-format-ls.adoc2
-rw-r--r--src/images/wavedrom/c-int-reg-immed.adoc6
-rw-r--r--src/images/wavedrom/c-integer-const-gen.adoc4
-rw-r--r--src/images/wavedrom/c-mop.adoc12
-rw-r--r--src/images/wavedrom/c-sp-load-store-css.adoc4
-rw-r--r--src/images/wavedrom/c-srli-srai.adoc4
-rw-r--r--src/images/wavedrom/csr-instr.adoc2
-rw-r--r--src/images/wavedrom/ct-conditional.adoc5
-rw-r--r--src/images/wavedrom/ct-unconditional-2.adoc2
-rw-r--r--src/images/wavedrom/division-op.adoc2
-rw-r--r--src/images/wavedrom/load-reserve-st-conditional.adoc2
-rw-r--r--src/images/wavedrom/mop-r.adoc15
-rw-r--r--src/images/wavedrom/mop-rr.adoc15
-rw-r--r--src/images/wavedrom/reg-based-ldnstr.adoc2
-rw-r--r--src/images/wavedrom/sp-load-store-2.adoc24
-rw-r--r--src/images/wavedrom/v-inst-table.adoc210
-rw-r--r--src/images/wavedrom/valu-format.adoc104
-rw-r--r--src/images/wavedrom/vcfg-format.adoc47
-rw-r--r--src/images/wavedrom/vfrec7.adoc136
-rw-r--r--src/images/wavedrom/vfrsqrt7.adoc137
-rw-r--r--src/images/wavedrom/vmem-format.adoc108
-rw-r--r--src/images/wavedrom/vtype-format.adoc28
-rw-r--r--src/intro.adoc22
-rw-r--r--src/latex/figs/PLIC-block-diagram.pdfbin13826 -> 0 bytes
-rw-r--r--src/latex/figs/PLIC-interrupt-flow.pdfbin15941 -> 0 bytes
-rw-r--r--src/latex/figs/halimps.pdfbin10187 -> 0 bytes
-rw-r--r--src/latex/figs/halmode.pdfbin11179 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_addrpo.pdfbin4481 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_addrpo.pdf_t49
-rw-r--r--src/latex/figs/litmus_datacoirfi.pdfbin4081 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_datacoirfi.pdf_t61
-rw-r--r--src/latex/figs/litmus_datarfi.pdfbin4668 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_datarfi.pdf_t56
-rw-r--r--src/latex/figs/litmus_lb_lrsc.pdfbin4988 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_lb_lrsc.pdf_t50
-rw-r--r--src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdfbin4081 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf_t56
-rw-r--r--src/latex/figs/litmus_ppoca.pdfbin5280 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_ppoca.pdf_t58
-rw-r--r--src/latex/figs/litmus_rsw.pdfbin4308 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_rsw.pdf_t61
-rw-r--r--src/latex/figs/litmus_sample.pdfbin5320 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_sample.pdf_t52
-rw-r--r--src/latex/figs/litmus_sb_fwd.pdfbin3886 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_sb_fwd.pdf_t52
-rw-r--r--src/latex/figs/litmus_subsumption.pdfbin3620 -> 0 bytes
-rw-r--r--src/latex/figs/litmus_subsumption.pdf_t49
-rw-r--r--src/latex/figs/privimps.pdfbin9537 -> 0 bytes
-rw-r--r--src/latex/figs/virtimps.pdfbin11644 -> 0 bytes
-rw-r--r--src/latex/hypervisor.tex3590
-rw-r--r--src/latex/machine.tex3823
-rw-r--r--src/latex/preamble.tex148
-rw-r--r--src/latex/priv-csrs.tex554
-rw-r--r--src/latex/priv-history.tex29
-rw-r--r--src/latex/priv-insns.tex9
-rw-r--r--src/latex/priv-instr-table.tex344
-rw-r--r--src/latex/priv-intro.tex210
-rw-r--r--src/latex/priv-preface.tex276
-rw-r--r--src/latex/riscv-privileged.tex89
-rw-r--r--src/latex/riscv-spec.bib513
-rw-r--r--src/latex/rnmi.tex234
-rw-r--r--src/latex/supervisor.tex2705
-rw-r--r--src/m-st-ext.adoc9
-rw-r--r--src/machine.adoc375
-rw-r--r--src/mm-alloy.adoc2
-rw-r--r--src/mm-eplan.adoc2
-rw-r--r--src/mm-formal.adoc70
-rw-r--r--src/naming.adoc56
-rw-r--r--src/priv-csrs.adoc280
-rw-r--r--src/priv-preface.adoc49
-rw-r--r--src/resources/riscv-spec.bib2
-rw-r--r--src/resources/themes/riscv-spec.yml65
-rw-r--r--src/riscv-privileged.adoc19
-rw-r--r--src/riscv-unprivileged.adoc54
-rw-r--r--src/rnmi.adoc28
-rw-r--r--src/rv-32-64g.adoc13
-rw-r--r--src/rv32.adoc20
-rw-r--r--src/rv32e.adoc2
-rw-r--r--src/rv64.adoc2
-rw-r--r--src/rvwmo.adoc23
-rw-r--r--src/smepmp.adoc171
-rw-r--r--src/smstateen.adoc406
-rw-r--r--src/sscofpmt.adoc189
-rw-r--r--src/sstc.adoc190
-rw-r--r--src/supervisor.adoc251
-rw-r--r--src/v-st-ext.adoc5185
-rw-r--r--src/vector-examples.adoc125
-rw-r--r--src/zam-st-ext.adoc55
-rw-r--r--src/zawrs.adoc105
-rw-r--r--src/zc.adoc2611
-rw-r--r--src/zfa.adoc10
-rw-r--r--src/zfh.adoc6
-rw-r--r--src/zfinx.adoc2
-rw-r--r--src/zicsr.adoc39
-rw-r--r--src/zimop.adoc122
147 files changed, 12930 insertions, 13981 deletions
diff --git a/.github/workflows/isa-build.yml b/.github/workflows/isa-build.yml
index 7135c26..ca1b4c5 100644
--- a/.github/workflows/isa-build.yml
+++ b/.github/workflows/isa-build.yml
@@ -28,7 +28,7 @@ jobs:
steps:
# Checkout the repository
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Set the short SHA for use in artifact names
- name: Set short SHA
@@ -57,7 +57,7 @@ jobs:
# Upload the priv-isa-asciidoc PDF file
- name: Upload priv-isa-asciidoc.pdf
if: steps.build_files.outcome == 'success'
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: priv-isa-asciidoc-${{ env.SHORT_SHA }}.pdf
path: ${{ github.workspace }}/build/priv-isa-asciidoc.pdf
@@ -66,7 +66,7 @@ jobs:
# Upload the priv-isa-asciidoc HTML file
- name: Upload priv-isa-asciidoc.html
if: steps.build_files.outcome == 'success'
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: priv-isa-asciidoc-${{ env.SHORT_SHA }}.html
path: ${{ github.workspace }}/build/priv-isa-asciidoc.html
@@ -75,7 +75,7 @@ jobs:
# Upload the unpriv-isa-asciidoc PDF file
- name: Upload unpriv-isa-asciidoc.pdf
if: steps.build_files.outcome == 'success'
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: unpriv-isa-asciidoc-${{ env.SHORT_SHA }}.pdf
path: ${{ github.workspace }}/build/unpriv-isa-asciidoc.pdf
@@ -84,24 +84,15 @@ jobs:
# Upload the unpriv-isa-asciidoc HTML file
- name: Upload unpriv-isa-asciidoc.html
if: steps.build_files.outcome == 'success'
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: unpriv-isa-asciidoc-${{ env.SHORT_SHA }}.html
path: ${{ github.workspace }}/build/unpriv-isa-asciidoc.html
retention-days: 7
- # Upload the priv-isa-latex PDF file
- - name: Upload riscv-privileged.pdf
- if: steps.build_files.outcome == 'success'
- uses: actions/upload-artifact@v3
- with:
- name: riscv-privileged-latex-${{ env.SHORT_SHA }}.pdf
- path: ${{ github.workspace }}/build/riscv-privileged.pdf
- retention-days: 7
-
- name: Create Release
if: steps.build_files.outcome == 'success' && github.event_name == 'workflow_dispatch' && github.event.inputs.create_release == 'true'
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
with:
draft: false
tag_name: riscv-isa-release-${{ env.SHORT_SHA }}-${{ env.CURRENT_DATE }}
@@ -114,7 +105,6 @@ jobs:
${{ github.workspace }}/build/priv-isa-asciidoc.html
${{ github.workspace }}/build/unpriv-isa-asciidoc.pdf
${{ github.workspace }}/build/unpriv-isa-asciidoc.html
- ${{ github.workspace }}/build/riscv-privileged.pdf
env:
GITHUB_TOKEN: ${{ secrets.GHTOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/merge-and-release.yml b/.github/workflows/merge-and-release.yml
new file mode 100644
index 0000000..88390e0
--- /dev/null
+++ b/.github/workflows/merge-and-release.yml
@@ -0,0 +1,88 @@
+name: Release New ISA When Merging a PR
+
+on:
+ pull_request:
+ branches:
+ - main
+ types:
+ - closed
+
+jobs:
+ if_merged:
+ if: github.event.pull_request.merged == true
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - run: |
+ echo The PR was successfully merged.
+
+ - name: Set short SHA
+ run: echo "SHORT_SHA=$(echo ${GITHUB_SHA::7})" >> $GITHUB_ENV
+
+ - name: Get current date
+ run: echo "CURRENT_DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+ - name: Pull Container
+ id: pull_container_image
+ run: |
+ docker pull riscvintl/riscv-docs-base-container-image:latest
+
+ - name: Build Files
+ id: build_files
+ if: steps.pull_container_image.outcome == 'success'
+ run: |
+ docker run --rm -v ${{ github.workspace }}:/build riscvintl/riscv-docs-base-container-image:latest \
+ /bin/sh -c 'cd ./build && make'
+
+ # Upload the priv-isa-asciidoc PDF file
+ - name: Upload priv-isa-asciidoc.pdf
+ if: steps.build_files.outcome == 'success'
+ uses: actions/upload-artifact@v4
+ with:
+ name: priv-isa-asciidoc-${{ env.SHORT_SHA }}.pdf
+ path: ${{ github.workspace }}/build/priv-isa-asciidoc.pdf
+
+ # Upload the priv-isa-asciidoc HTML file
+ - name: Upload priv-isa-asciidoc.html
+ if: steps.build_files.outcome == 'success'
+ uses: actions/upload-artifact@v4
+ with:
+ name: priv-isa-asciidoc-${{ env.SHORT_SHA }}.html
+ path: ${{ github.workspace }}/build/priv-isa-asciidoc.html
+
+ # Upload the unpriv-isa-asciidoc PDF file
+ - name: Upload unpriv-isa-asciidoc.pdf
+ if: steps.build_files.outcome == 'success'
+ uses: actions/upload-artifact@v4
+ with:
+ name: unpriv-isa-asciidoc-${{ env.SHORT_SHA }}.pdf
+ path: ${{ github.workspace }}/build/unpriv-isa-asciidoc.pdf
+
+ # Upload the unpriv-isa-asciidoc HTML file
+ - name: Upload unpriv-isa-asciidoc.html
+ if: steps.build_files.outcome == 'success'
+ uses: actions/upload-artifact@v4
+ with:
+ name: unpriv-isa-asciidoc-${{ env.SHORT_SHA }}.html
+ path: ${{ github.workspace }}/build/unpriv-isa-asciidoc.html
+
+ - name: Create Release
+ uses: softprops/action-gh-release@v2
+ env:
+ GITHUB_TOKEN: ${{ secrets.GHTOKEN }}
+ with:
+ tag_name: riscv-isa-release-${{ env.SHORT_SHA }}-${{ env.CURRENT_DATE }}
+ name: Release riscv-isa-release-${{ env.SHORT_SHA }}-${{ env.CURRENT_DATE }}
+ draft: false
+ prerelease: false
+ make_latest: true
+ generate_release_notes: true
+ body: |
+ This release was created by: ${{ github.event.sender.login }}
+ Release of RISC-V ISA, built from commit ${{ env.SHORT_SHA }}, is now available.
+ files: |
+ ${{ github.workspace }}/build/priv-isa-asciidoc.pdf
+ ${{ github.workspace }}/build/priv-isa-asciidoc.html
+ ${{ github.workspace }}/build/unpriv-isa-asciidoc.pdf
+ ${{ github.workspace }}/build/unpriv-isa-asciidoc.html
diff --git a/.gitignore b/.gitignore
index e61db2e..0253b91 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
.DS_Store
.*.swp
+.vscode
+src/.asciidoctor
+src/diag*
diff --git a/README.md b/README.md
index 51e756f..9ef0639 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[![RISC-V ISA Build](https://github.com/riscv/riscv-isa-manual/actions/workflows/isa-build.yml/badge.svg)](https://github.com/riscv/riscv-isa-manual/actions/workflows/isa-build.yml)
-This repository contains the source files for the RISC-V Instruction Set Manual, which is comprised of the Privileged RISC-V Instruction Set Manual (LaTeX) and the Unprivileged RISC-V Instruction Set Manual (AsciiDoc). The preface of each document indicates the version of each standard that has been formally ratified by RISC-V International.
+This repository contains the source files for the RISC-V Instruction Set Manual, which consists of the Privileged RISC-V Instruction Set Manual (LaTeX) and the Unprivileged RISC-V Instruction Set Manual (AsciiDoc). The preface of each document indicates the version of each standard that has been formally ratified by RISC-V International.
This work is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). See the [LICENSE](LICENSE) file for details.
@@ -26,3 +26,7 @@ If you would like to contribute to this documentation, please refer to the [Docu
The recommended method for building the PDF files is to use the Docker Image, as described in the [RISC-V Docs Base Container Image repository](https://github.com/riscv/riscv-docs-base-container-image).
Alternative build methods, such as local builds and GitHub Action builds, are also available and described in the Documentation Developer's Guide.
+
+## Repo Activity
+
+![Alt](https://repobeats.axiom.co/api/embed/ccec87dc4502f2ed7c216b670b5ed8efc33a1d4c.svg "Repobeats analytics image")
diff --git a/build/.gitignore b/build/.gitignore
index 3b37eba..3c56997 100644
--- a/build/.gitignore
+++ b/build/.gitignore
@@ -1,9 +1,11 @@
*.aux
*.bbl
*.blg
+*.html
*.log
*.out
*.pdf
+*.pdf.tmp
*.toc
images
.asciidoctor
diff --git a/build/Makefile b/build/Makefile
index 9b72b01..fad4fbc 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -14,7 +14,7 @@
# for generating documentation in various formats (PDF, HTML).
# Build Targets
-TARGETS := priv unpriv priv-html unpriv-html priv-latex
+TARGETS := priv unpriv priv-html unpriv-html
# Declare phony targets
.PHONY: all $(TARGETS) clean
@@ -22,8 +22,14 @@ TARGETS := priv unpriv priv-html unpriv-html priv-latex
# Default target builds all
all: $(TARGETS)
+# Build with preinstalled docker container; first install it with:
+# docker pull riscvintl/riscv-docs-base-container-image:latest
+docker:
+ cd .. && docker run -it -v .:/build riscvintl/riscv-docs-base-container-image:latest /bin/sh -c 'cd ./build; make $(MAKEFLAGS)'
+
# Asciidoctor options
-ASCIIDOCTOR_OPTS := --attribute=mathematical-format=svg \
+ASCIIDOCTOR_OPTS := -a compress \
+ --attribute=mathematical-format=svg \
--failure-level=ERROR \
--require=asciidoctor-bibtex \
--require=asciidoctor-diagram \
@@ -33,15 +39,6 @@ ASCIIDOCTOR_OPTS := --attribute=mathematical-format=svg \
# Source directory
SRCDIR := ../src
-# LaTeX source and related files
-#SRCS := $(wildcard $(SRCDIR)/latex/*.tex)
-#FIGS := $(wildcard $(SRCDIR)/latex/figs/*)
-#BIBS := $(SRCDIR)/latex/riscv-spec.bib
-
-# LaTeX build tools
-#PDFLATEX := TEXINPUTS=$(SRCDIR)/latex: pdflatex -interaction=nonstopmode -halt-on-error
-#BIBTEX := BIBINPUTS=$(SRCDIR)/latex: bibtex
-
# Temporary files to clean up for LaTeX build
JUNK := *.pdf *.aux *.log *.bbl *.blg *.toc *.out *.fdb_latexmk *.fls *.synctex.gz
@@ -77,15 +74,6 @@ unpriv-isa-asciidoc.html: $(SRCDIR)/riscv-unprivileged.adoc
@echo "Building Unprivileged ISA HTML"
asciidoctor $(ASCIIDOCTOR_OPTS) --out-file=$@ $<
-# LaTeX build for Privileged ISA
-#priv-latex: riscv-privileged.pdf
-
-#riscv-privileged.pdf: $(SRCDIR)/latex/riscv-privileged.tex $(SRCS) $(FIGS) $(BIBS)
-# $(PDFLATEX) riscv-privileged
-# $(BIBTEX) riscv-privileged
-# $(PDFLATEX) riscv-privileged
-# $(PDFLATEX) riscv-privileged
-
clean:
@if [ -f priv-isa-asciidoc.pdf ]; then \
echo "Removing priv-isa-asciidoc.pdf"; \
@@ -103,11 +91,4 @@ clean:
echo "Removing unpriv-isa-asciidoc.html"; \
rm -f unpriv-isa-asciidoc.html; \
fi
-# @echo "Cleaning up files from LaTeX build"
-# @cd $(SRCDIR)/latex; \
-# for file in $(JUNK); do \
-# if [ -f "$$file" ]; then \
-# echo "Removing $$file"; \
-# rm -f "$$file"; \
-# fi; \
-# done
+
diff --git a/dependencies/Gemfile b/dependencies/Gemfile
index 8cf7a50..f347221 100644
--- a/dependencies/Gemfile
+++ b/dependencies/Gemfile
@@ -2,6 +2,7 @@ source 'https://rubygems.org'
gem 'asciidoctor'
gem 'asciidoctor-bibtex'
gem 'asciidoctor-diagram'
+gem 'mathematical'
gem 'asciidoctor-mathematical'
gem 'asciidoctor-pdf'
gem 'citeproc-ruby'
diff --git a/marchid.md b/marchid.md
index c208fbf..82af726 100644
--- a/marchid.md
+++ b/marchid.md
@@ -52,3 +52,13 @@ AIRISC | Fraunhofer IMS | [AIRISC Support](mailto:airisc
Proteus | imec-DistriNet, KU Leuven | [Marton Bognar](mailto:marton.bognar@kuleuven.be) | 32 | https://github.com/proteus-core/proteus
VexRiscv | SpinalHDL | [Charles Papon](mailto:charles.papon.90@gmail.com) | 33 | https://github.com/SpinalHDL/VexRiscv
Shuttle | UC Berkeley | [Jerry Zhao](mailto:jerryz123@berkeley.edu) | 34 | https://github.com/ucb-bar/shuttle
+CV32E2 | OpenHW Group | [Davide Schiavone](mailto:davide@openhwgroup.org), OpenHW Group | 35 | https://github.com/openhwgroup/cve2
+CVW | OpenHW Group | [James Stine](mailto:james.stine@okstate.edu), OpenHW Group | 36 | https://github.com/openhwgroup/cvw
+Boa32 | Julian Scheffers | [Julian Scheffers](mailto:julian@scheffers.net) | 37 | https://github.com/robotman2412/boa-risc-v
+WIV64 | Jesús Sanz del Rey | [Jesús Sanz del Rey](mailto:jesussanz2003@gmail.com) | 38 | https://github.com/StartForKiller/WivCPU
+RV6 | Nikola Lukić | [Nikola Lukić](mailto:lukicn@protonmail.com) | 39 | https://github.com/kiclu/rv6
+ApogeoRV | Gabriele Tripi | [Gabriele Tripi](mailto:tripi.gabriele2002@gmail.com) | 40 | https://github.com/GabbedT/ApogeoRV
+MicroRV32 | AGRA, Group of Computer Architecture, University of Bremen | [RISC-V @ AGRA](mailto:riscv@informatik.uni-bremen.de) | 41 | https://github.com/agra-uni-bremen/microrv32
+QEMU | qemu.org | [QEMU Mailing List](mailto:qemu-riscv@nongnu.org) | 42 | https://qemu.org
+KianV | Hirosh Dabui | [Hirosh Dabui](mailto:hirosh@dabui.de) | 43 | https://github.com/splinedrive/kianRiscV
+Coreblocks | Kuźnia Rdzeni, University of Wrocław | [Coreblocks Team](mailto:coreblocks@cs.uni.wroc.pl) | 44 | https://github.com/kuznia-rdzeni/coreblocks
diff --git a/src/a-st-ext.adoc b/src/a-st-ext.adoc
index c402585..9fae7ab 100644
--- a/src/a-st-ext.adoc
+++ b/src/a-st-ext.adoc
@@ -62,10 +62,11 @@ if the reservation is still valid and the reservation set contains the
bytes being written. If the SC.W succeeds, the instruction writes the
word in _rs2_ to memory, and it writes zero to _rd_. If the SC.W fails,
the instruction does not write to memory, and it writes a nonzero value
-to _rd_. Regardless of success or failure, executing an SC.W instruction
-invalidates any reservation held by this hart. LR.D and SC.D act
-analogously on doublewords and are only available on RV64. For RV64,
-LR.W and SC.W sign-extend the value placed in _rd_.
+to _rd_. For the purposes of memory protection, a failed SC.W may be
+treated like a store. Regardless of success or failure, executing an
+SC.W instruction invalidates any reservation held by this hart. LR.D and
+SC.D act analogously on doublewords and are only available on RV64. For
+RV64, LR.W and SC.W sign-extend the value placed in _rd_.
[NOTE]
====
@@ -369,7 +370,19 @@ is not naturally aligned, an address-misaligned exception or an
access-fault exception will be generated. The access-fault exception can
be generated for a memory access that would otherwise be able to
complete except for the misalignment, if the misaligned access should
-not be emulated. The "Zam" extension, described in <<zam>>, relaxes this requirement and specifies the semantics of misaligned AMOs.
+not be emulated.
+
+The misaligned atomicity granule PMA, defined in Volume II of this manual,
+optionally relaxes this alignment requirement.
+If present, the misaligned atomicity granule PMA specifies the size
+of a misaligned atomicity granule, a power-of-two number of bytes.
+The misaligned atomicity granule PMA applies only to AMOs, loads and stores
+defined in the base ISAs, and loads and stores of no more than XLEN bits
+defined in the F, D, and Q extensions.
+For an instruction in that set, if all accessed bytes lie within the same
+misaligned atomicity granule, the instruction will not raise an exception for
+reasons of address alignment, and the instruction will give rise to only one
+memory operation for the purposes of RVWMO--i.e., it will execute atomically.
The operations supported are swap, integer add, bitwise AND, bitwise OR,
bitwise XOR, and signed and unsigned integer maximum and minimum.
diff --git a/src/c-st-ext.adoc b/src/c-st-ext.adoc
index 1c2f81d..4cc36cd 100644
--- a/src/c-st-ext.adoc
+++ b/src/c-st-ext.adoc
@@ -76,7 +76,7 @@ integer loads and stores.
====
RVC was designed under the constraint that each RVC instruction expands
-into a single 32-bit instruction in either the base ISA (RV32I/E, RV64I,
+into a single 32-bit instruction in either the base ISA (RV32I/E, RV64I/E,
or RV128I) or the F and D standard extensions where present. Adopting
this constraint has two main benefits:
@@ -185,7 +185,7 @@ ADDI4SPN instruction.
The RISC-V ABI was changed to make the frequently used registers map to
registers 'x8-x15'. This simplifies the decompression decoder by
having a contiguous naturally aligned set of register numbers, and is
-also compatible with the RV32E base ISA, which only has 16 integer
+also compatible with the RV32E and RV64E base ISAs, which only have 16 integer
registers.
====
Compressed register-based floating-point loads and stores also use the
@@ -298,7 +298,7 @@ registers.
==== Stack-Pointer-Based Loads and Stores
include::images/wavedrom/c-sp-load-store.adoc[]
-[c-sp-load-store]
+[[c-sp-load-store]]
//.Stack-Pointer-Based Loads and Stores--these instructions use the CI format.
These instructions use the CI format.
@@ -306,8 +306,7 @@ These instructions use the CI format.
C.LWSP loads a 32-bit value from memory into register _rd_. It computes
an effective address by adding the _zero_-extended offset, scaled by 4,
to the stack pointer, `x2`. It expands to `lw rd, offset(x2)`. C.LWSP is
-only valid when _rd_&#x2260;x0 the code
-points with _rd_=x0 are reserved.
+only valid when _rd_&#x2260;x0 the code points with _rd_=x0 are reserved.
C.LDSP is an RV64C/RV128C-only instruction that loads a 64-bit value
from memory into register _rd_. It computes its effective address by
@@ -336,14 +335,14 @@ _zero_-extended offset, scaled by 8, to the stack pointer, `x2`. It
expands to `fld rd, offset(x2)`.
include::images/wavedrom/c-sp-load-store-css.adoc[]
-[c-sp-load-store-css]
+[[c-sp-load-store-css]]
//.Stack-Pointer-Based Loads and Stores--these instructions use the CSS format.
These instructions use the CSS format.
C.SWSP stores a 32-bit value in register _rs2_ to memory. It computes an
effective address by adding the _zero_-extended offset, scaled by 4, to
-the stack pointer, `x2`. It expands to 'sw rs2, offset(x2)'.
+the stack pointer, `x2`. It expands to `sw rs2, offset(x2)`.
C.SDSP is an RV64C/RV128C-only instruction that stores a 64-bit value in
register _rs2_ to memory. It computes an effective address by adding the
@@ -421,7 +420,7 @@ _zero_-extended offset, scaled by 4, to the base address in register
`_rs1′_`. It expands to `lw rd′, offset(rs1′)`.
C.LD is an RV64C/RV128C-only instruction that loads a 64-bit value from
-memory into register `_rd′`. It computes an effective
+memory into register `_rd′_`. It computes an effective
address by adding the _zero_-extended offset, scaled by 8, to the base
address in register `_rs1′_`. It expands to
`ld rd′, offset(rs1′)`.
@@ -611,8 +610,8 @@ C.ADDI16SP shares the opcode with C.LUI, but has a destination field of
value in the stack pointer (`sp=x2`), where the immediate is scaled to
represent multiples of 16 in the range (-512,496). C.ADDI16SP is used to
adjust the stack pointer in procedure prologues and epilogues. It
-expands into `addi x2, x2, imm`. C.ADDI16SP is only valid when
-_imm_≠0; the code point with _imm_=0 is reserved.
+expands into `addi x2, x2, nzimm[9:4]`. C.ADDI16SP is only valid when
+_nzimm_≠0; the code point with _nzimm_=0 is reserved.
[NOTE]
====
@@ -628,8 +627,8 @@ C.ADDI4SPN is a CIW-format instruction that adds a _zero_-extended
non-zero immediate, scaled by 4, to the stack pointer, `x2`, and writes
the result to `rd′`. This instruction is used to generate
pointers to stack-allocated variables, and expands to
-`addi rd′, x2, uimm`. C.ADDI4SPN is only valid when
-_uimm_≠0; the code points with _uimm_=0 are
+`addi rd′, x2, nzuimm[9:2]`. C.ADDI4SPN is only valid when
+_nzuimm_≠0; the code points with _nzuimm_=0 are
reserved.
[[c-ci]]
@@ -641,7 +640,7 @@ C.SLLI is a CI-format instruction that performs a logical left shift of
the value in register _rd_ then writes the result to _rd_. The shift
amount is encoded in the _shamt_ field. For RV128C, a shift amount of
zero is used to encode a shift of 64. C.SLLI expands into
-`slli rd, rd, shamt`, except for RV128C with `shamt=0`, which expands to
+`slli rd, rd, shamt[5:0]`, except for RV128C with `shamt=0`, which expands to
`slli rd, rd, 64`.
For RV32C, _shamt[5]_ must be zero; the code points with _shamt[5]_=1
@@ -705,7 +704,7 @@ include::images/wavedrom/c-int-reg-to-reg-cr-format.adoc[]
These instructions use the CR format.
C.MV copies the value in register _rs2_ into register _rd_. C.MV expands
-into _add rd, x0, rs2_. C.MV is only valid when
+into `add rd, x0, rs2`. C.MV is only valid when
`rs2≠x0` the code points with `rs2=x0` correspond to the C.JR instruction. The code points with `rs2≠x0` and `rd=x0` are HINTs.
[TIP]
@@ -718,7 +717,7 @@ hardware cost._
====
C.ADD adds the values in registers _rd_ and _rs2_ and writes the result
-to register _rd_. C.ADD expands into _add rd, rd, rs2_. C.ADD is only
+to register _rd_. C.ADD expands into `add rd, rd, rs2`. C.ADD is only
valid when `rs2≠x0` the code points with `rs2=x0` correspond to the C.JALR
and C.EBREAK instructions. The code points with `rs2≠x0` and rd=x0 are HINTs.
@@ -872,7 +871,7 @@ no standard HINTs will ever be defined in this subspace.
|===
|Instruction |Constraints |Code Points |Purpose
-|C.NOP |_imm_≠0 |63 .6+.^|_Reserved for future standard use_
+|C.NOP |_imm_≠0 |63 .6+.^|_Designated for future standard use_
|C.ADDI | _rd_≠`x0`, _imm_=0 |31
@@ -968,4 +967,4 @@ include::images/bytefield/rvc-instr-quad1.adoc[]
[[rvc-instr-table2]]
.Instruction listing for RVC, Quadrant 2
include::images/bytefield/rvc-instr-quad2.adoc[]
-//include::images/bytefield/rvc-instr-quad2.png[] \ No newline at end of file
+//include::images/bytefield/rvc-instr-quad2.png[]
diff --git a/src/calling-convention.adoc b/src/calling-convention.adoc
new file mode 100644
index 0000000..f5cb079
--- /dev/null
+++ b/src/calling-convention.adoc
@@ -0,0 +1,29 @@
+[appendix]
+== Calling Convention for Vector State (Not authoritative - Placeholder Only)
+
+NOTE: This Appendix is only a placeholder to help explain the
+conventions used in the code examples, and is not considered frozen or
+part of the ratification process. The official RISC-V psABI document
+is being expanded to specify the vector calling conventions.
+
+In the RISC-V psABI, the vector registers `v0`-`v31` are all caller-saved.
+The `vl` and `vtype` CSRs are also caller-saved.
+
+Procedures may assume that `vstart` is zero upon entry. Procedures may
+assume that `vstart` is zero upon return from a procedure call.
+
+NOTE: Application software should normally not write `vstart` explicitly.
+Any procedure that does explicitly write `vstart` to a nonzero value must
+zero `vstart` before either returning or calling another procedure.
+
+The `vxrm` and `vxsat` fields of `vcsr` have thread storage duration.
+
+Executing a system call causes all caller-saved vector registers
+(`v0`-`v31`, `vl`, `vtype`) and `vstart` to become unspecified.
+
+NOTE: This scheme allows system calls that cause context switches to avoid
+saving and later restoring the vector registers.
+
+NOTE: Most OSes will choose to either leave these registers intact or reset
+them to their initial state to avoid leaking information across process
+boundaries.
diff --git a/src/cmo.adoc b/src/cmo.adoc
new file mode 100644
index 0000000..705166a
--- /dev/null
+++ b/src/cmo.adoc
@@ -0,0 +1,1130 @@
+[[cmo]]
+== Base Cache Management Operation ISA Extensions
+
+=== Pseudocode for instruction semantics
+
+The semantics of each instruction in the <<#insns>> chapter is expressed in a
+SAIL-like syntax.
+
+[#intro,reftext="Introduction"]
+=== Introduction
+
+_Cache-management operation_ (or _CMO_) instructions perform operations on
+copies of data in the memory hierarchy. In general, CMO instructions operate on
+cached copies of data, but in some cases, a CMO instruction may operate on
+memory locations directly. Furthermore, CMO instructions are grouped by
+operation into the following classes:
+
+* A _management_ instruction manipulates cached copies of data with respect to a
+ set of agents that can access the data
+* A _zero_ instruction zeros out a range of memory locations, potentially
+ allocating cached copies of data in one or more caches
+* A _prefetch_ instruction indicates to hardware that data at a given memory
+ location may be accessed in the near future, potentially allocating cached
+ copies of data in one or more caches
+
+This document introduces a base set of CMO ISA extensions that operate
+specifically on cache blocks or the memory locations corresponding to a cache
+block; these are known as _cache-block operation_ (or _CBO_) instructions. Each
+of the above classes of instructions represents an extension in this
+specification:
+
+* The _Zicbom_ extension defines a set of cache-block management instructions:
+ `CBO.INVAL`, `CBO.CLEAN`, and `CBO.FLUSH`
+* The _Zicboz_ extension defines a cache-block zero instruction: `CBO.ZERO`
+* The _Zicbop_ extension defines a set of cache-block prefetch instructions:
+ `PREFETCH.R`, `PREFETCH.W`, and `PREFETCH.I`
+
+The execution behavior of the above instructions is also modified by CSR state
+added by this specification.
+
+The remainder of this document provides general background information on CMO
+instructions and describes each of the above ISA extensions.
+
+[NOTE]
+====
+_The term CMO encompasses all operations on caches or resources related to
+caches. The term CBO represents a subset of CMOs that operate only on cache
+blocks. The first CMO extensions only define CBOs._
+====
+
+[#background,reftext="Background"]
+=== Background
+
+This chapter provides information common to all CMO extensions.
+
+[#memory-caches,reftext="Memory and Caches"]
+==== Memory and Caches
+
+A _memory location_ is a physical resource in a system uniquely identified by a
+_physical address_. An _agent_ is a logic block, such as a RISC-V hart,
+accelerator, I/O device, etc., that can access a given memory location.
+
+[NOTE]
+====
+_A given agent may not be able to access all memory locations in a system, and
+two different agents may or may not be able to access the same set of memory
+locations._
+====
+
+A _load operation_ (or _store operation_) is performed by an agent to consume
+(or modify) the data at a given memory location. Load and store operations are
+performed as a result of explicit memory accesses to that memory location.
+Additionally, a _read transfer_ from memory fetches the data at the memory
+location, while a _write transfer_ to memory updates the data at the memory
+location.
+
+A _cache_ is a structure that buffers copies of data to reduce average memory
+latency. Any number of caches may be interspersed between an agent and a memory
+location, and load and store operations from an agent may be satisfied by a
+cache instead of the memory location.
+
+[NOTE]
+====
+_Load and store operations are decoupled from read and write transfers by
+caches. For example, a load operation may be satisfied by a cache without
+performing a read transfer from memory, or a store operation may be satisfied by
+a cache that first performs a read transfer from memory._
+====
+
+Caches organize copies of data into _cache blocks_, each of which represents a
+contiguous, naturally aligned power-of-two (or _NAPOT_) range of memory
+locations. A cache block is identified by a physical address corresponding to
+the underlying memory locations. The capacity and organization of a cache and
+the size of a cache block are both _implementation-specific_, and the execution
+environment provides software a means to discover information about the caches
+and cache blocks in a system. In the initial set of CMO extensions, the size of
+a cache block shall be uniform throughout the system.
+
+[NOTE]
+====
+_In future CMO extensions, the requirement for a uniform cache block size may be
+relaxed._
+====
+
+Implementation techniques such as speculative execution or hardware prefetching
+may cause a given cache to allocate or deallocate a copy of a cache block at any
+time, provided the corresponding physical addresses are accessible according to
+the supported access type PMA and are cacheable according to the cacheability
+PMA. Allocating a copy of a cache block results in a read transfer from another
+cache or from memory, while deallocating a copy of a cache block may result in a
+write transfer to another cache or to memory depending on whether the data in
+the copy were modified by a store operation. Additional details are discussed in
+<<#coherent-agents-caches>>.
+
+==== Cache-Block Operations
+
+A CBO instruction causes one or more operations to be performed on the cache
+blocks identified by the instruction. In general, a CBO instruction may identify
+one or more cache blocks; however, in the initial set of CMO extensions, CBO
+instructions identify a single cache block only.
+
+A cache-block management instruction performs one of the following operations,
+relative to the copy of a given cache block allocated in a given cache:
+
+* An _invalidate operation_ deallocates the copy of the cache block
+
+* A _clean operation_ performs a write transfer to another cache or to memory if
+ the data in the copy of the cache block have been modified by a store
+ operation
+
+* A _flush operation_ atomically performs a clean operation followed by an
+ invalidate operation
+
+Additional details, including the actual operation performed by a given
+cache-block management instruction, are described in <<#Zicbom>>.
+
+A cache-block zero instruction performs a set of store operations that write
+zeros to the set of bytes corresponding to a cache block. Unless specified
+otherwise, the store operations generated by a cache-block zero instruction have
+the same general properties and behaviors that other store instructions in the
+architecture have. An implementation may or may not update the entire set of
+bytes atomically with a single store operation. Additional details are described
+in <<#Zicboz>>.
+
+A cache-block prefetch instruction is a HINT to the hardware that software
+expects to perform a particular type of memory access in the near future.
+Additional details are described in <<#Zicbop>>.
+
+[#coherent-agents-caches,reftext="Coherent Agents and Caches"]
+=== Coherent Agents and Caches
+
+For a given memory location, a _set of coherent agents_ consists of the agents
+for which all of the following hold:
+
+* Store operations from all agents in the set appear to be serialized with
+ respect to each other
+* Store operations from all agents in the set eventually appear to all other
+ agents in the set
+* A load operation from an agent in the set returns data from a store operation
+ from an agent in the set (or from the initial data in memory)
+
+The coherent agents within such a set shall access a given memory location with
+the same physical address and the same physical memory attributes; however, if
+the coherence PMA for a given agent indicates a given memory location is not
+coherent, that agent shall not be a member of a set of coherent agents with any
+other agent for that memory location and shall be the sole member of a set of
+coherent agents consisting of itself.
+
+An agent who is a member of a set of coherent agents is said to be _coherent_
+with respect to the other agents in the set. On the other hand, an agent who is
+_not_ a member is said to be _non-coherent_ with respect to the agents in the
+set.
+
+Caches introduce the possibility that multiple copies of a given cache block may
+be present in a system at the same time. An _implementation-specific_ mechanism
+keeps these copies coherent with respect to the load and store operations from
+the agents in the set of coherent agents. Additionally, if a coherent agent in
+the set executes a CBO instruction that specifies the cache block, the resulting
+operation shall apply to any and all of the copies in the caches that can be
+accessed by the load and store operations from the coherent agents.
+
+[NOTE]
+====
+_An operation from a CBO instruction is defined to operate only on the copies of
+a cache block that are cached in the caches accessible by the explicit memory
+accesses performed by the set of coherent agents. This includes copies of a
+cache block in caches that are accessed only indirectly by load and store
+operations, e.g. coherent instruction caches._
+====
+
+The set of caches subject to the above mechanism form a _set of coherent
+caches_, and each coherent cache has the following behaviors, assuming all
+operations are performed by the agents in a set of coherent agents:
+
+* A coherent cache is permitted to allocate and deallocate copies of a cache
+ block and perform read and write transfers as described in <<#memory-caches>>
+
+* A coherent cache is permitted to perform a write transfer to memory provided
+ that a store operation has modified the data in the cache block since the most
+ recent invalidate, clean, or flush operation on the cache block
+
+* At least one coherent cache is responsible for performing a write transfer to
+ memory once a store operation has modified the data in the cache block until
+ the next invalidate, clean, or flush operation on the cache block, after which
+ no coherent cache is responsible (or permitted) to perform a write transfer to
+ memory until the next store operation has modified the data in the cache block
+
+* A coherent cache is required to perform a write transfer to memory if a store
+ operation has modified the data in the cache block since the most recent
+ invalidate, clean, or flush operation on the cache block and if the next clean
+ or flush operation requires a write transfer to memory
+
+[NOTE]
+====
+_The above restrictions ensure that a "clean" copy of a cache block, fetched by
+a read transfer from memory and unmodified by a store operation, cannot later
+overwrite the copy of the cache block in memory updated by a write transfer to
+memory from a non-coherent agent._
+====
+
+A non-coherent agent may initiate a cache-block operation that operates on the
+set of coherent caches accessed by a set of coherent agents. The mechanism to
+perform such an operation is _implementation-specific_.
+
+==== Memory Ordering
+
+===== Preserved Program Order
+
+The preserved program order (abbreviated _PPO_) rules are defined by the RVWMO
+memory ordering model. How the operations resulting from CMO instructions fit
+into these rules is described below.
+
+For cache-block management instructions, the resulting invalidate, clean, and
+flush operations behave as stores in the PPO rules subject to one additional
+overlapping address rule. Specifically, if _a_ precedes _b_ in program order,
+then _a_ will precede _b_ in the global memory order if:
+
+* _a_ is an invalidate, clean, or flush, _b_ is a load, and _a_ and _b_ access
+ overlapping memory addresses
+
+[NOTE]
+====
+_The above rule ensures that a subsequent load in program order never appears
+in the global memory order before a preceding invalidate, clean, or flush
+operation to an overlapping address._
+====
+
+Additionally, invalidate, clean, and flush operations are classified as W or O
+(depending on the physical memory attributes for the corresponding physical
+addresses) for the purposes of predecessor and successor sets in `FENCE`
+instructions. These operations are _not_ ordered by other instructions that
+order stores, e.g. `FENCE.I` and `SFENCE.VMA`.
+
+For cache-block zero instructions, the resulting store operations behave as
+stores in the PPO rules and are ordered by other instructions that order stores.
+
+Finally, for cache-block prefetch instructions, the resulting operations are
+_not_ ordered by the PPO rules nor are they ordered by any other ordering
+instructions.
+
+===== Load Values
+
+An invalidate operation may change the set of values that can be returned by a
+load. In particular, an additional condition is added to the Load Value Axiom:
+
+* If an invalidate operation _i_ precedes a load _r_ and operates on a byte _x_
+ returned by _r_, and no store to _x_ appears between _i_ and _r_ in program
+ order or in the global memory order, then _r_ returns any of the following
+ values for _x_:
+
+. If no clean or flush operations on _x_ precede _i_ in the global memory order,
+ either the initial value of _x_ or the value of any store to _x_ that precedes
+ _i_
+
+. If no store to _x_ precedes a clean or flush operation on _x_ in the global
+ memory order and if the clean or flush operation on _x_ precedes _i_ in the
+ global memory order, either the initial value of _x_ or the value of any store
+ to _x_ that precedes _i_
+
+. If a store to _x_ precedes a clean or flush operation on _x_ in the global
+ memory order and if the clean or flush operation on _x_ precedes _i_ in the
+ global memory order, either the value of the latest store to _x_ that precedes
+ the latest clean or flush operation on _x_ or the value of any store to _x_
+ that both precedes _i_ and succeeds the latest clean or flush operation on _x_
+ that precedes _i_
+
+. The value of any store to _x_ by a non-coherent agent regardless of the above
+ conditions
+
+[NOTE]
+====
+_The first three bullets describe the possible load values at different points
+in the global memory order relative to clean or flush operations. The final
+bullet implies that the load value may be produced by a non-coherent agent at
+any time._
+====
+
+==== Traps
+
+Execution of certain CMO instructions may result in traps due to CSR state,
+described in the <<#csr_state>> section, or due to the address translation and
+protection mechanisms. The trapping behavior of CMO instructions is described in
+the following sections.
+
+===== Illegal Instruction and Virtual Instruction Exceptions
+
+Cache-block management instructions and cache-block zero instructions may raise
+illegal instruction exceptions or virtual instruction exceptions depending on
+the current privilege mode and the state of the CMO control registers described
+in the <<#csr_state>> section.
+
+Cache-block prefetch instructions raise neither illegal instruction exceptions
+nor virtual instruction exceptions.
+
+===== Page Fault, Guest-Page Fault, and Access Fault Exceptions
+
+Similar to load and store instructions, CMO instructions are explicit memory
+access instructions that compute an effective address. The effective address is
+ultimately translated into a physical address based on the privilege mode and
+the enabled translation mechanisms, and the CMO extensions impose the following
+constraints on the physical addresses in a given cache block:
+
+* The PMP access control bits shall be the same for _all_ physical addresses in
+ the cache block, and if write permission is granted by the PMP access control
+ bits, read permission shall also be granted
+
+* The PMAs shall be the same for _all_ physical addresses in the cache block,
+ and if write permission is granted by the supported access type PMAs, read
+ permission shall also be granted
+
+If the above constraints are not met, the behavior of a CBO instruction is
+UNSPECIFIED.
+
+[NOTE]
+====
+_This specification assumes that the above constraints will typically be met for
+main memory regions and may be met for certain I/O regions._
+====
+
+The Zicboz extension introduces an additional supported access type PMA for
+cache-block zero instructions. Main memory regions are required to support
+accesses by cache-block zero instructions; however, I/O regions may specify
+whether accesses by cache-block zero instructions are supported.
+
+A cache-block management instruction is permitted to access the specified cache
+block whenever a load instruction or store instruction is permitted to access
+the corresponding physical addresses. If neither a load instruction nor store
+instruction is permitted to access the physical addresses, but an instruction
+fetch is permitted to access the physical addresses, whether a cache-block
+management instruction is permitted to access the cache block is UNSPECIFIED. If
+access to the cache block is not permitted, a cache-block management instruction
+raises a store page fault or store guest-page fault exception if address
+translation does not permit any access or raises a store access fault exception
+otherwise. During address translation, the instruction also checks the accessed
+bit and may either raise an exception or set the bit as required.
+
+[NOTE]
+====
+_The interaction between cache-block management instructions and instruction
+fetches will be specified in a future extension._
+
+_As implied by omission, a cache-block management instruction does not check the
+dirty bit and neither raises an exception nor sets the bit._
+====
+
+A cache-block zero instruction is permitted to access the specified cache block
+whenever a store instruction is permitted to access the corresponding physical
+addresses and when the PMAs indicate that cache-block zero instructions are a
+supported access type. If access to the cache block is not permitted, a
+cache-block zero instruction raises a store page fault or store guest-page fault
+exception if address translation does not permit write access or raises a store
+access fault exception otherwise. During address translation, the instruction
+also checks the accessed and dirty bits and may either raise an exception or set
+the bits as required.
+
+A cache-block prefetch instruction is permitted to access the specified cache
+block whenever a load instruction, store instruction, or instruction fetch is
+permitted to access the corresponding physical addresses. If access to the cache
+block is not permitted, a cache-block prefetch instruction does not raise any
+exceptions and shall not access any caches or memory. During address
+translation, the instruction does _not_ check the accessed and dirty bits and
+neither raises an exception nor sets the bits.
+
+[NOTE]
+====
+_Like a load or store instruction, a CMO instruction may or may not be permitted
+to access a cache block based on the states of the `MPRV`, `MPV`, and `MPP` bits
+in `mstatus` and the `SUM` and `MXR` bits in `mstatus`, `sstatus`, and
+`vsstatus`._
+
+_This specification expects that implementations will process cache-block
+management instructions like store/AMO instructions, so store/AMO exceptions are
+appropriate for these instructions, regardless of the permissions required._
+====
+
+===== Address Misaligned Exceptions
+
+CMO instructions do _not_ generate address misaligned exceptions.
+
+===== Breakpoint Exceptions and Debug Mode Entry
+
+Unless otherwise defined by the debug architecture specification, the behavior
+of trigger modules with respect to CMO instructions is UNSPECIFIED.
+
+[NOTE]
+====
+_For the Zicbom, Zicboz, and Zicbop extensions, this specification recommends
+the following common trigger module behaviors:_
+
+* Type 6 address match triggers, i.e. `tdata1.type=6` and `mcontrol6.select=0`,
+ should be supported
+
+* Type 2 address/data match triggers, i.e. `tdata1.type=2`, should be
+ unsupported
+
+* The size of a memory access equals the size of the cache block accessed, and
+ the compare values follow from the addresses of the NAPOT memory region
+ corresponding to the cache block containing the effective address
+
+* Unless an encoding for a cache block is added to the `mcontrol6.size` field,
+ an address trigger should only match a memory access from a CBO instruction if
+ `mcontrol6.size=0`
+
+_If the Zicbom extension is implemented, this specification recommends the
+following additional trigger module behaviors:_
+
+* Implementing address match triggers should be optional
+
+* Type 6 data match triggers, i.e. `tdata1.type=6` and `mcontrol6.select=1`,
+ should be unsupported
+
+* Memory accesses are considered to be stores, i.e. an address trigger matches
+ only if `mcontrol6.store=1`
+
+_If the Zicboz extension is implemented, this specification recommends the
+following additional trigger module behaviors:_
+
+* Implementing address match triggers should be mandatory
+
+* Type 6 data match triggers, i.e. `tdata1.type=6` and `mcontrol6.select=1`,
+ should be supported, and implementing these triggers should be optional
+
+* Memory accesses are considered to be stores, i.e. an address trigger matches
+ only if `mcontrol6.store=1`
+
+_If the Zicbop extension is implemented, this specification recommends the
+following additional trigger module behaviors:_
+
+* Implementing address match triggers should be optional
+
+* Type 6 data match triggers, i.e. `tdata1.type=6` and `mcontrol6.select=1`,
+ should be unsupported
+
+* Memory accesses may be considered to be loads or stores depending on the
+ implementation, i.e. whether an address trigger matches on these instructions
+ when `mcontrol6.load=1` or `mcontrol6.store=1` is _implementation-specific_
+
+_This specification also recommends that the behavior of trigger modules with
+respect to the Zicboz extension should be defined in version 1.0 of the debug
+architecture specification. The behavior of trigger modules with respect to the
+Zicbom and Zicbop extensions is expected to be defined in future extensions._
+====
+
+===== Hypervisor Extension
+
+For the purposes of writing the `mtinst` or `htinst` register on a trap, the
+following standard transformation is defined for cache-block management
+instructions and cache-block zero instructions:
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 'opcode'},
+ { bits: 5, name: 0x0 },
+ { bits: 3, name: 'funct3'},
+ { bits: 5, name: 0x0},
+ { bits: 12, name: 'operation'},
+]}
+....
+
+The `operation` field corresponds to the 12 most significant bits of the
+trapping instruction.
+
+[NOTE]
+====
+_As described in the hypervisor extension, a zero may be written into `mtinst`
+or `htinst` instead of the standard transformation defined above._
+====
+
+==== Effects on Constrained LR/SC Loops
+
+The following event is added to the list of events that satisfy the eventuality
+guarantee provided by constrained LR/SC loops, as defined in the A extension:
+
+* Some other hart executes a cache-block management instruction or a cache-block
+ zero instruction to the reservation set of the LR instruction in _H_'s
+ constrained LR/SC loop.
+
+[NOTE]
+====
+_The above event has been added to accommodate cache coherence protocols that
+cannot distinguish between invalidations for stores and invalidations for
+cache-block management operations._
+
+_Aside from the above event, CMO instructions neither change the properties of
+constrained LR/SC loops nor modify the eventuality guarantee provided by them.
+For example, executing a CMO instruction may cause a constrained LR/SC loop on
+any hart to fail periodically or may cause a unconstrained LR/SC sequence on the
+same hart to fail always. Additionally, executing a cache-block prefetch
+instruction does not impact the eventuality guarantee provided by constrained
+LR/SC loops executed on any hart._
+====
+
+==== Software Discovery
+
+The initial set of CMO extensions requires the following information to be
+discovered by software:
+
+* The size of the cache block for management and prefetch instructions
+* The size of the cache block for zero instructions
+* CBIE support at each privilege level
+
+Other general cache characteristics may also be specified in the discovery
+mechanism.
+
+[#csr_state,reftext="Control and Status Register State"]
+=== Control and Status Register State
+
+[NOTE]
+====
+_The CMO extensions rely on state in {csrname} CSRs that will be defined in a
+future update to the privileged architecture. If this CSR update is not
+ratified, the CMO extension will define its own CSRs._
+====
+
+Three CSRs control the execution of CMO instructions:
+
+* `m{csrname}`
+* `s{csrname}`
+* `h{csrname}`
+
+The `s{csrname}` register is used by all supervisor modes, including VS-mode. A
+hypervisor is responsible for saving and restoring `s{csrname}` on guest context
+switches. The `h{csrname}` register is only present if the H-extension is
+implemented and enabled.
+
+Each `x{csrname}` register (where `x` is `m`, `s`, or `h`) has the following
+generic format:
+
+.Generic Format for x{csrname} CSRs
+[cols="^10,^10,80a"]
+|===
+| Bits | Name | Description
+
+| [5:4] | `CBIE` | Cache Block Invalidate instruction Enable
+
+Enables the execution of the cache block invalidate instruction, `CBO.INVAL`, in
+a lower privilege mode:
+
+* `00`: The instruction raises an illegal instruction or virtual instruction
+ exception
+* `01`: The instruction is executed and performs a flush operation
+* `10`: _Reserved_
+* `11`: The instruction is executed and performs an invalidate operation
+
+| [6] | `CBCFE` | Cache Block Clean and Flush instruction Enable
+
+Enables the execution of the cache block clean instruction, `CBO.CLEAN`, and the
+cache block flush instruction, `CBO.FLUSH`, in a lower privilege mode:
+
+* `0`: The instruction raises an illegal instruction or virtual instruction
+ exception
+* `1`: The instruction is executed
+
+| [7] | `CBZE` | Cache Block Zero instruction Enable
+
+Enables the execution of the cache block zero instruction, `CBO.ZERO`, in a
+lower privilege mode:
+
+* `0`: The instruction raises an illegal instruction or virtual instruction
+ exception
+* `1`: The instruction is executed
+
+|===
+
+The x{csrname} registers control CBO instruction execution based on the current
+privilege mode and the state of the appropriate CSRs, as detailed below.
+
+A `CBO.INVAL` instruction executes or raises either an illegal instruction
+exception or a virtual instruction exception based on the state of the
+`x{csrname}.CBIE` fields:
+
+[source,sail,subs="attributes+"]
+--
+
+// illegal instruction exceptions
+if (((priv_mode != M) && (m{csrname}.CBIE == 00)) ||
+ ((priv_mode == U) && (s{csrname}.CBIE == 00)))
+{
+ <raise illegal instruction exception>
+}
+// virtual instruction exceptions
+else if (((priv_mode == VS) && (h{csrname}.CBIE == 00)) ||
+ ((priv_mode == VU) && ((h{csrname}.CBIE == 00) || (s{csrname}.CBIE == 00))))
+{
+ <raise virtual instruction exception>
+}
+// execute instruction
+else
+{
+ if (((priv_mode != M) && (m{csrname}.CBIE == 01)) ||
+ ((priv_mode == U) && (s{csrname}.CBIE == 01)) ||
+ ((priv_mode == VS) && (h{csrname}.CBIE == 01)) ||
+ ((priv_mode == VU) && ((h{csrname}.CBIE == 01) || (s{csrname}.CBIE == 01))))
+ {
+ <execute CBO.INVAL and perform flush operation>
+ }
+ else
+ {
+ <execute CBO.INVAL and perform invalidate operation>
+ }
+}
+
+
+--
+
+[NOTE]
+====
+_Until a modified cache block has updated memory, a `CBO.INVAL` instruction may
+expose stale data values in memory if the CSRs are programmed to perform an
+invalidate operation. This behavior may result in a security hole if lower
+privileged level software performs an invalidate operation and accesses
+sensitive information in memory._
+
+_To avoid such holes, higher privileged level software must perform either a
+clean or flush operation on the cache block before permitting lower privileged
+level software to perform an invalidate operation on the block. Alternatively,
+higher privileged level software may program the CSRs so that `CBO.INVAL`
+either traps or performs a flush operation in a lower privileged level._
+====
+
+A `CBO.CLEAN` or `CBO.FLUSH` instruction executes or raises an illegal
+instruction or virtual instruction exception based on the state of the
+`x{csrname}.CBCFE` bits:
+
+[source,sail,subs="attributes+"]
+--
+
+// illegal instruction exceptions
+if (((priv_mode != M) && !m{csrname}.CBCFE) ||
+ ((priv_mode == U) && !s{csrname}.CBCFE))
+{
+ <raise illegal instruction exception>
+}
+// virtual instruction exceptions
+else if (((priv_mode == VS) && !h{csrname}.CBCFE) ||
+ ((priv_mode == VU) && !(h{csrname}.CBCFE && s{csrname}.CBCFE)))
+{
+ <raise virtual instruction exception>
+}
+// execute instruction
+else
+{
+ <execute CBO.CLEAN or CBO.FLUSH>
+}
+
+--
+
+Finally, a `CBO.ZERO` instruction executes or raises an illegal instruction or
+virtual instruction exception based on the state of the `x{csrname}.CBZE` bits:
+
+[source,sail,subs="attributes+"]
+--
+
+// illegal instruction exceptions
+if (((priv_mode != M) && !m{csrname}.CBZE) ||
+ ((priv_mode == U) && !s{csrname}.CBZE))
+{
+ <raise illegal instruction exception>
+}
+// virtual instruction exceptions
+else if (((priv_mode == VS) && !h{csrname}.CBZE) ||
+ ((priv_mode == VU) && !(h{csrname}.CBZE && s{csrname}.CBZE)))
+{
+ <raise virtual instruction exception>
+}
+// execute instruction
+else
+{
+ <execute CBO.ZERO>
+}
+
+--
+
+Each `x{csrname}` register is WARL; however, software should determine the legal
+values from the execution environment discovery mechanism.
+
+[#extensions,reftext="Extensions"]
+=== Extensions
+
+CMO instructions are defined in the following extensions:
+
+* <<#Zicbom>>
+* <<#Zicboz>>
+* <<#Zicbop>>
+
+[#Zicbom,reftext="Cache-Block Management Instructions"]
+==== Cache-Block Management Instructions
+
+Cache-block management instructions enable software running on a set of coherent
+agents to communicate with a set of non-coherent agents by performing one of the
+following operations:
+
+* An invalidate operation makes data from store operations performed by a set of
+ non-coherent agents visible to the set of coherent agents at a point common to
+ both sets by deallocating all copies of a cache block from the set of coherent
+ caches up to that point
+
+* A clean operation makes data from store operations performed by the set of
+ coherent agents visible to a set of non-coherent agents at a point common to
+ both sets by performing a write transfer of a copy of a cache block to that
+ point provided a coherent agent performed a store operation that modified the
+ data in the cache block since the previous invalidate, clean, or flush
+ operation on the cache block
+
+* A flush operation atomically performs a clean operation followed by an
+ invalidate operation
+
+In the Zicbom extension, the instructions operate to a point common to _all_
+agents in the system. In other words, an invalidate operation ensures that store
+operations from all non-coherent agents visible to agents in the set of coherent
+agents, and a clean operation ensures that store operations from coherent agents
+visible to all non-coherent agents.
+
+[NOTE]
+====
+_The Zicbom extension does not prohibit agents that fall outside of the above
+architectural definition; however, software cannot rely on the defined cache
+operations to have the desired effects with respect to those agents._
+
+_Future extensions may define different sets of agents for the purposes of
+performance optimization._
+====
+
+These instructions operate on the cache block whose effective address is
+specified in _rs1_. The effective address is translated into a corresponding
+physical address by the appropriate translation mechanisms.
+
+The following instructions comprise the Zicbom extension:
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|&#10003;
+|&#10003;
+|cbo.clean _base_
+|<<#insns-cbo_clean>>
+
+|&#10003;
+|&#10003;
+|cbo.flush _base_
+|<<#insns-cbo_flush>>
+
+|&#10003;
+|&#10003;
+|cbo.inval _base_
+|<<#insns-cbo_inval>>
+
+|===
+
+[#Zicboz,reftext="Cache-Block Zero Instructions"]
+==== Cache-Block Zero Instructions
+
+Cache-block zero instructions store zeros to the set of bytes corresponding to a
+cache block. An implementation may update the bytes in any order and with any
+granularity and atomicity, including individual bytes.
+
+[NOTE]
+====
+_Cache-block zero instructions store zeros independently of whether data from
+the underlying memory locations are cacheable. In addition, this specification
+does not constrain how the bytes are written._
+====
+
+These instructions operate on the cache block, or the memory locations
+corresponding to the cache block, whose effective address is specified in _rs1_.
+The effective address is translated into a corresponding physical address by the
+appropriate translation mechanisms.
+
+The following instructions comprise the Zicboz extension:
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|&#10003;
+|&#10003;
+|cbo.zero _base_
+|<<#insns-cbo_zero>>
+
+|===
+
+[#Zicbop,reftext="Cache-Block Prefetch Instructions"]
+==== Cache-Block Prefetch Instructions
+
+Cache-block prefetch instructions are HINTs to the hardware to indicate that
+software intends to perform a particular type of memory access in the near
+future. The types of memory accesses are instruction fetch, data read (i.e.
+load), and data write (i.e. store).
+
+These instructions operate on the cache block whose effective address is the sum
+of the base address specified in _rs1_ and the sign-extended offset encoded in
+_imm[11:0]_, where _imm[4:0]_ shall equal `0b00000`. The effective address is
+translated into a corresponding physical address by the appropriate translation
+mechanisms.
+
+[NOTE]
+====
+_Cache-block prefetch instructions are encoded as ORI instructions with rd equal
+to `0b00000`; however, for the purposes of effective address calculation, this
+field is also interpreted as imm[4:0] like a store instruction._
+====
+
+The following instructions comprise the Zicbop extension:
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|&#10003;
+|&#10003;
+|prefetch.i _offset_(_base_)
+|<<#insns-prefetch_i>>
+
+|&#10003;
+|&#10003;
+|prefetch.r _offset_(_base_)
+|<<#insns-prefetch_r>>
+
+|&#10003;
+|&#10003;
+|prefetch.w _offset_(_base_)
+|<<#insns-prefetch_w>>
+
+|===
+
+[#insns,reftext="Instructions"]
+=== Instructions
+
+[#insns-cbo_clean,reftext="Cache Block Clean"]
+==== cbo.clean
+
+Synopsis::
+Perform a clean operation on a cache block
+
+Mnemonic::
+cbo.clean _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
+ { bits: 5, name: 0x0 },
+ { bits: 3, name: 0x2, attr: ['CBO'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 12, name: 0x001, attr: ['CBO.CLEAN'] },
+]}
+....
+
+Description::
+
+A *cbo.clean* instruction performs a clean operation on the cache block whose
+effective address is the base address specified in _rs1_. The offset operand may
+be omitted; otherwise, any expression that computes the offset shall evaluate to
+zero. The instruction operates on the set of coherent caches accessed by the
+agent executing the instruction.
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-cbo_flush,reftext="Cache Block Flush"]
+==== cbo.flush
+
+Synopsis::
+Perform a flush operation on a cache block
+
+Mnemonic::
+cbo.flush _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
+ { bits: 5, name: 0x0 },
+ { bits: 3, name: 0x2, attr: ['CBO'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 12, name: 0x002, attr: ['CBO.FLUSH'] },
+]}
+....
+
+Description::
+
+A *cbo.flush* instruction performs a flush operation on the cache block whose
+effective address is the base address specified in _rs1_. The offset operand may
+be omitted; otherwise, any expression that computes the offset shall evaluate to
+zero. The instruction operates on the set of coherent caches accessed by the
+agent executing the instruction.
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-cbo_inval,reftext="Cache Block Invalidate"]
+==== cbo.inval
+
+Synopsis::
+Perform an invalidate operation on a cache block
+
+Mnemonic::
+cbo.inval _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
+ { bits: 5, name: 0x0 },
+ { bits: 3, name: 0x2, attr: ['CBO'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 12, name: 0x000, attr: ['CBO.INVAL'] },
+]}
+....
+
+Description::
+
+A *cbo.inval* instruction performs an invalidate operation on the cache block
+whose effective address is the base address specified in _rs1_. The offset
+operand may be omitted; otherwise, any expression that computes the offset shall
+evaluate to zero. The instruction operates on the set of coherent caches
+accessed by the agent executing the instruction. Depending on CSR programming,
+the instruction may perform a flush operation instead of an invalidate
+operation.
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-cbo_zero,reftext="Cache Block Zero"]
+==== cbo.zero
+
+Synopsis::
+Store zeros to the full set of bytes corresponding to a cache block
+
+Mnemonic::
+cbo.zero _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
+ { bits: 5, name: 0x0 },
+ { bits: 3, name: 0x2, attr: ['CBO'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 12, name: 0x004, attr: ['CBO.ZERO'] },
+]}
+....
+
+Description::
+
+A *cbo.zero* instruction performs stores of zeros to the full set of bytes
+corresponding to the cache block whose effective address is the base address
+specified in _rs1_. The offset operand may be omitted; otherwise, any expression
+that computes the offset shall evaluate to zero. An implementation may or may
+not update the entire set of bytes atomically.
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-prefetch_i,reftext="Cache Block Prefetch for Instruction Fetch"]
+==== prefetch.i
+
+Synopsis::
+Provide a HINT to hardware that a cache block is likely to be accessed by an
+instruction fetch in the near future
+
+Mnemonic::
+prefetch.i _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0x13, attr: ['OP-IMM'] },
+ { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
+ { bits: 3, name: 0x6, attr: ['ORI'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 5, name: 0x0, attr: ['PREFETCH.I'] },
+ { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
+]}
+....
+
+Description::
+
+A *prefetch.i* instruction indicates to hardware that the cache block whose
+effective address is the sum of the base address specified in _rs1_ and the
+sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
+is likely to be accessed by an instruction fetch in the near future.
+
+[NOTE]
+====
+_An implementation may opt to cache a copy of the cache block in a cache
+accessed by an instruction fetch in order to improve memory access latency, but
+this behavior is not required._
+====
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-prefetch_r,reftext="Cache Block Prefetch for Data Read"]
+==== prefetch.r
+
+Synopsis::
+Provide a HINT to hardware that a cache block is likely to be accessed by a data
+read in the near future
+
+Mnemonic::
+prefetch.r _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0x13, attr: ['OP-IMM'] },
+ { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
+ { bits: 3, name: 0x6, attr: ['ORI'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 5, name: 0x1, attr: ['PREFETCH.R'] },
+ { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
+]}
+....
+
+Description::
+
+A *prefetch.r* instruction indicates to hardware that the cache block whose
+effective address is the sum of the base address specified in _rs1_ and the
+sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
+is likely to be accessed by a data read (i.e. load) in the near future.
+
+[NOTE]
+====
+_An implementation may opt to cache a copy of the cache block in a cache
+accessed by a data read in order to improve memory access latency, but this
+behavior is not required._
+====
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
+[#insns-prefetch_w,reftext="Cache Block Prefetch for Data Write"]
+==== prefetch.w
+
+Synopsis::
+Provide a HINT to hardware that a cache block is likely to be accessed by a data
+write in the near future
+
+Mnemonic::
+prefetch.w _offset_(_base_)
+
+Encoding::
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 7, name: 0x13, attr: ['OP-IMM'] },
+ { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
+ { bits: 3, name: 0x6, attr: ['ORI'] },
+ { bits: 5, name: 'rs1', attr: ['base'] },
+ { bits: 5, name: 0x3, attr: ['PREFETCH.W'] },
+ { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
+]}
+....
+
+Description::
+
+A *prefetch.w* instruction indicates to hardware that the cache block whose
+effective address is the sum of the base address specified in _rs1_ and the
+sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
+is likely to be accessed by a data write (i.e. store) in the near future.
+
+[NOTE]
+====
+_An implementation may opt to cache a copy of the cache block in a cache
+accessed by a data write in order to improve memory access latency, but this
+behavior is not required._
+====
+
+Operation::
+[source,sail]
+--
+TODO
+--
+
diff --git a/src/colophon.adoc b/src/colophon.adoc
index 0e8a16f..efdf097 100644
--- a/src/colophon.adoc
+++ b/src/colophon.adoc
@@ -28,18 +28,14 @@ h|Extension h|Version h|Status
|*D* |*2.2* |*Ratified*
|*Q* |*2.2* |*Ratified*
|*C* |*2.0* |*Ratified*
-|_Counters_ |_2.0_ |_Draft_
-|_L_ |_0.0_ |_Draft_
-|_B_ |_0.0_ |_Draft_
-|_J_ |_0.0_ |_Draft_
-|_T_ |_0.0_ |_Draft_
+|_Counters_ |*2.0* |*Ratified*
|_P_ |_0.2_ |_Draft_
-|_V_ |_1.0_ |_Frozen_
+|*V* |*1.0* |*Ratified*
|*Zicsr* |*2.0* |*Ratified*
|*Zifencei* |*2.0* |*Ratified*
|*Zihintpause* |*2.0* |*Ratified*
|*Zihintntl* |*1.0* |*Ratified*
-|_Zam_ |_0.1_ |_Draft_
+|*Zfa* |*1.0* |*Ratified*
|*Zfh* |*1.0* |*Ratified*
|*Zfhmin* |*1.0* |*Ratified*
|*Zfinx* |*1.0* |*Ratified*
@@ -50,7 +46,12 @@ h|Extension h|Version h|Status
|*Ztso* |*1.0* |*Ratified*
|===
-*_Preface to Document Version 20191213-Base-Ratified_*
+The changes in this version of the document include:
+
+* The draft Zam extension has been removed, in favor of the
+definition of a misaligned atomicity granule PMA.
+
+[.big]*_Preface to Document Version 20191213-Base-Ratified_*
This document describes the RISC-V unprivileged architecture.
@@ -97,7 +98,7 @@ December 2019.
* Moved N extension for user-mode interrupts into Volume II.
* Defined PAUSE hint instruction.
-*_Preface to Document Version 20190608-Base-Ratified_*
+[.big]*_Preface to Document Version 20190608-Base-Ratified_*
This document describes the RISC-V unprivileged architecture.
@@ -175,7 +176,7 @@ data caches. However, it remains the only standard instruction-fetch
coherence mechanism.
* Removed prohibitions on using RV32E with other extensions.
* Removed platform-specific mandates that certain encodings produce
-illegal instruction exceptions in RV32E and RV64I chapters.
+illegal-instruction exceptions in RV32E and RV64I chapters.
* Counter/timer instructions are now not considered part of the
mandatory base ISA, and so CSR instructions were moved into separate
chapter and marked as version 2.0, with the unprivileged counters moved
@@ -202,7 +203,7 @@ group documents.
* Removed text of `V` extension chapter as now superseded by separate
vector extension draft document.
-*_Preface to Document Version 2.2_*
+[.big]*_Preface to Document Version 2.2_*
This is version 2.2 of the document describing the RISC-V user-level
architecture. The document contains the following versions of the RISC-V
@@ -268,7 +269,7 @@ integer registers.
by the RISC-V ELF psABI Specification cite:[riscv-elf-psabi].
* The C extension has been frozen and renumbered version 2.0.
-*_Preface to Document Version 2.1_*
+[.big]*_Preface to Document Version 2.1_*
This is version 2.1 of the document describing the RISC-V user-level
architecture. Note the frozen user-level ISA base and extensions `IMAFDQ`
@@ -300,7 +301,7 @@ supports `MAC` extensions.
description of the RV32E calling convention.
* A revised proposal for the `C` compressed extension, version 1.9 .
-*_Preface to Version 2.0_*
+[.big]*_Preface to Version 2.0_*
This is the second release of the user ISA specification, and we intend
the specification of the base user ISA plus general extensions (i.e.,
diff --git a/src/counters.adoc b/src/counters.adoc
index d559c53..cf646c6 100644
--- a/src/counters.adoc
+++ b/src/counters.adoc
@@ -1,5 +1,5 @@
[[counters]]
-== "Zicntr" and "Zihpm" Counters
+== "Zicntr" and "Zihpm" Counters, Version 2.0
RISC-V ISAs provide a set of up to thirty-two 64-bit performance
counters and timers that are accessible via unprivileged XLEN-bit
@@ -196,7 +196,7 @@ implementations with a richer set of counters.
The implemented number and width of these additional counters, and the
set of events they count, is platform-specific. Accessing an
-unimplemented or ill-configured counter may cause an illegal instruction
+unimplemented or ill-configured counter may cause an illegal-instruction
exception or may return a constant value.
The execution environment should provide a means to determine the number
diff --git a/src/d-st-ext.adoc b/src/d-st-ext.adoc
index 4274017..17629dd 100644
--- a/src/d-st-ext.adoc
+++ b/src/d-st-ext.adoc
@@ -27,7 +27,7 @@ floating-point precisions supported, including H, F, D, and Q.
=== NaN Boxing of Narrower Values
When multiple floating-point precisions are supported, then valid values
-of narrower _n_-bit types, _n<_FLEN, are represented in the lower _n_ bits of an FLEN-bit NaN value, in a process termed NaN-boxing. The upper bits of a valid NaN-boxed value must be all 1s. Valid NaN-boxed _n_-bit values
+of narrower _n_-bit types, _n_<FLEN, are represented in the lower _n_ bits of an FLEN-bit NaN value, in a process termed NaN-boxing. The upper bits of a valid NaN-boxed value must be all 1s. Valid NaN-boxed _n_-bit values
therefore appear as negative quiet NaNs (qNaNs) when viewed as any wider
_m_-bit value, _n_ < _m_ &#8804; FLEN. Any operation that writes a narrower result to an 'f' register must write all 1s to the uppermost FLEN-_n_ bits to yield a legal NaN-boxedvalue.
(((floating-point, requirements)))
@@ -45,13 +45,13 @@ migration, and debugging.
Floating-point _n_-bit transfer operations move external
values held in IEEE standard formats into and out of the `f` registers,
-and comprise floating-point loads and stores (FL_n_/FS_n_) and floating-point move instructions (FMV._n_.X/FMV.X._n_). A narrower _n_-bit transfer, _n_<FLEN, into the `f` registers will create a valid NaN-boxed value. A narrower
+and comprise floating-point loads and stores (FL__n__/FS__n__) and floating-point move instructions (FMV._n_.X/FMV.X._n_). A narrower _n_-bit transfer, _n_<FLEN, into the `f` registers will create a valid NaN-boxed value. A narrower
_n_-bit transfer out of the floating-point registers will
transfer the lower _n_ bits of the register ignoring the
upper FLEN-_n_ bits.
Apart from transfer operations described in the previous paragraph, all
-other floating-point operations on narrower latexmath:[$n$]-bit
+other floating-point operations on narrower __n__-bit
operations, _n_<FLEN, check if the input operands are
correctly NaN-boxed, i.e., all upper FLEN-_n_ bits are 1. If
so, the _n_ least-significant bits of the input are used as
diff --git a/src/example/memcpy.s b/src/example/memcpy.s
new file mode 100644
index 0000000..5f6318a
--- /dev/null
+++ b/src/example/memcpy.s
@@ -0,0 +1,17 @@
+ .text
+ .balign 4
+ .global memcpy
+ # void *memcpy(void* dest, const void* src, size_t n)
+ # a0=dest, a1=src, a2=n
+ #
+ memcpy:
+ mv a3, a0 # Copy destination
+ loop:
+ vsetvli t0, a2, e8, m8, ta, ma # Vectors of 8b
+ vle8.v v0, (a1) # Load bytes
+ add a1, a1, t0 # Bump pointer
+ sub a2, a2, t0 # Decrement count
+ vse8.v v0, (a3) # Store bytes
+ add a3, a3, t0 # Bump pointer
+ bnez a2, loop # Any more?
+ ret # Return
diff --git a/src/example/saxpy.s b/src/example/saxpy.s
new file mode 100644
index 0000000..de7f224
--- /dev/null
+++ b/src/example/saxpy.s
@@ -0,0 +1,29 @@
+ .text
+ .balign 4
+ .global saxpy
+# void
+# saxpy(size_t n, const float a, const float *x, float *y)
+# {
+# size_t i;
+# for (i=0; i<n; i++)
+# y[i] = a * x[i] + y[i];
+# }
+#
+# register arguments:
+# a0 n
+# fa0 a
+# a1 x
+# a2 y
+
+saxpy:
+ vsetvli a4, a0, e32, m8, ta, ma
+ vle32.v v0, (a1)
+ sub a0, a0, a4
+ slli a4, a4, 2
+ add a1, a1, a4
+ vle32.v v8, (a2)
+ vfmacc.vf v8, fa0, v0
+ vse32.v v8, (a2)
+ add a2, a2, a4
+ bnez a0, saxpy
+ ret
diff --git a/src/example/sgemm.S b/src/example/sgemm.S
new file mode 100644
index 0000000..e29cc8d
--- /dev/null
+++ b/src/example/sgemm.S
@@ -0,0 +1,221 @@
+ .text
+ .balign 4
+ .global sgemm_nn
+# RV64IDV system
+#
+# void
+# sgemm_nn(size_t n,
+# size_t m,
+# size_t k,
+# const float*a, // m * k matrix
+# size_t lda,
+# const float*b, // k * n matrix
+# size_t ldb,
+# float*c, // m * n matrix
+# size_t ldc)
+#
+# c += a*b (alpha=1, no transpose on input matrices)
+# matrices stored in C row-major order
+
+#define n a0
+#define m a1
+#define k a2
+#define ap a3
+#define astride a4
+#define bp a5
+#define bstride a6
+#define cp a7
+#define cstride t0
+#define kt t1
+#define nt t2
+#define bnp t3
+#define cnp t4
+#define akp t5
+#define bkp s0
+#define nvl s1
+#define ccp s2
+#define amp s3
+
+# Use args as additional temporaries
+#define ft12 fa0
+#define ft13 fa1
+#define ft14 fa2
+#define ft15 fa3
+
+# This version holds a 16*VLMAX block of C matrix in vector registers
+# in inner loop, but otherwise does not cache or TLB tiling.
+
+sgemm_nn:
+ addi sp, sp, -FRAMESIZE
+ sd s0, OFFSET(sp)
+ sd s1, OFFSET(sp)
+ sd s2, OFFSET(sp)
+
+ # Check for zero size matrices
+ beqz n, exit
+ beqz m, exit
+ beqz k, exit
+
+ # Convert elements strides to byte strides.
+ ld cstride, OFFSET(sp) # Get arg from stack frame
+ slli astride, astride, 2
+ slli bstride, bstride, 2
+ slli cstride, cstride, 2
+
+ slti t6, m, 16
+ bnez t6, end_rows
+
+c_row_loop: # Loop across rows of C blocks
+
+ mv nt, n # Initialize n counter for next row of C blocks
+
+ mv bnp, bp # Initialize B n-loop pointer to start
+ mv cnp, cp # Initialize C n-loop pointer
+
+c_col_loop: # Loop across one row of C blocks
+ vsetvli nvl, nt, e32, ta, ma # 32-bit vectors, LMUL=1
+
+ mv akp, ap # reset pointer into A to beginning
+ mv bkp, bnp # step to next column in B matrix
+
+ # Initalize current C submatrix block from memory.
+ vle32.v v0, (cnp); add ccp, cnp, cstride;
+ vle32.v v1, (ccp); add ccp, ccp, cstride;
+ vle32.v v2, (ccp); add ccp, ccp, cstride;
+ vle32.v v3, (ccp); add ccp, ccp, cstride;
+ vle32.v v4, (ccp); add ccp, ccp, cstride;
+ vle32.v v5, (ccp); add ccp, ccp, cstride;
+ vle32.v v6, (ccp); add ccp, ccp, cstride;
+ vle32.v v7, (ccp); add ccp, ccp, cstride;
+ vle32.v v8, (ccp); add ccp, ccp, cstride;
+ vle32.v v9, (ccp); add ccp, ccp, cstride;
+ vle32.v v10, (ccp); add ccp, ccp, cstride;
+ vle32.v v11, (ccp); add ccp, ccp, cstride;
+ vle32.v v12, (ccp); add ccp, ccp, cstride;
+ vle32.v v13, (ccp); add ccp, ccp, cstride;
+ vle32.v v14, (ccp); add ccp, ccp, cstride;
+ vle32.v v15, (ccp)
+
+
+ mv kt, k # Initialize inner loop counter
+
+ # Inner loop scheduled assuming 4-clock occupancy of vfmacc instruction and single-issue pipeline
+ # Software pipeline loads
+ flw ft0, (akp); add amp, akp, astride;
+ flw ft1, (amp); add amp, amp, astride;
+ flw ft2, (amp); add amp, amp, astride;
+ flw ft3, (amp); add amp, amp, astride;
+ # Get vector from B matrix
+ vle32.v v16, (bkp)
+
+ # Loop on inner dimension for current C block
+ k_loop:
+ vfmacc.vf v0, ft0, v16
+ add bkp, bkp, bstride
+ flw ft4, (amp)
+ add amp, amp, astride
+ vfmacc.vf v1, ft1, v16
+ addi kt, kt, -1 # Decrement k counter
+ flw ft5, (amp)
+ add amp, amp, astride
+ vfmacc.vf v2, ft2, v16
+ flw ft6, (amp)
+ add amp, amp, astride
+ flw ft7, (amp)
+ vfmacc.vf v3, ft3, v16
+ add amp, amp, astride
+ flw ft8, (amp)
+ add amp, amp, astride
+ vfmacc.vf v4, ft4, v16
+ flw ft9, (amp)
+ add amp, amp, astride
+ vfmacc.vf v5, ft5, v16
+ flw ft10, (amp)
+ add amp, amp, astride
+ vfmacc.vf v6, ft6, v16
+ flw ft11, (amp)
+ add amp, amp, astride
+ vfmacc.vf v7, ft7, v16
+ flw ft12, (amp)
+ add amp, amp, astride
+ vfmacc.vf v8, ft8, v16
+ flw ft13, (amp)
+ add amp, amp, astride
+ vfmacc.vf v9, ft9, v16
+ flw ft14, (amp)
+ add amp, amp, astride
+ vfmacc.vf v10, ft10, v16
+ flw ft15, (amp)
+ add amp, amp, astride
+ addi akp, akp, 4 # Move to next column of a
+ vfmacc.vf v11, ft11, v16
+ beqz kt, 1f # Don't load past end of matrix
+ flw ft0, (akp)
+ add amp, akp, astride
+1: vfmacc.vf v12, ft12, v16
+ beqz kt, 1f
+ flw ft1, (amp)
+ add amp, amp, astride
+1: vfmacc.vf v13, ft13, v16
+ beqz kt, 1f
+ flw ft2, (amp)
+ add amp, amp, astride
+1: vfmacc.vf v14, ft14, v16
+ beqz kt, 1f # Exit out of loop
+ flw ft3, (amp)
+ add amp, amp, astride
+ vfmacc.vf v15, ft15, v16
+ vle32.v v16, (bkp) # Get next vector from B matrix, overlap loads with jump stalls
+ j k_loop
+
+1: vfmacc.vf v15, ft15, v16
+
+ # Save C matrix block back to memory
+ vse32.v v0, (cnp); add ccp, cnp, cstride;
+ vse32.v v1, (ccp); add ccp, ccp, cstride;
+ vse32.v v2, (ccp); add ccp, ccp, cstride;
+ vse32.v v3, (ccp); add ccp, ccp, cstride;
+ vse32.v v4, (ccp); add ccp, ccp, cstride;
+ vse32.v v5, (ccp); add ccp, ccp, cstride;
+ vse32.v v6, (ccp); add ccp, ccp, cstride;
+ vse32.v v7, (ccp); add ccp, ccp, cstride;
+ vse32.v v8, (ccp); add ccp, ccp, cstride;
+ vse32.v v9, (ccp); add ccp, ccp, cstride;
+ vse32.v v10, (ccp); add ccp, ccp, cstride;
+ vse32.v v11, (ccp); add ccp, ccp, cstride;
+ vse32.v v12, (ccp); add ccp, ccp, cstride;
+ vse32.v v13, (ccp); add ccp, ccp, cstride;
+ vse32.v v14, (ccp); add ccp, ccp, cstride;
+ vse32.v v15, (ccp)
+
+ # Following tail instructions should be scheduled earlier in free slots during C block save.
+ # Leaving here for clarity.
+
+ # Bump pointers for loop across blocks in one row
+ slli t6, nvl, 2
+ add cnp, cnp, t6 # Move C block pointer over
+ add bnp, bnp, t6 # Move B block pointer over
+ sub nt, nt, nvl # Decrement element count in n dimension
+ bnez nt, c_col_loop # Any more to do?
+
+ # Move to next set of rows
+ addi m, m, -16 # Did 16 rows above
+ slli t6, astride, 4 # Multiply astride by 16
+ add ap, ap, t6 # Move A matrix pointer down 16 rows
+ slli t6, cstride, 4 # Multiply cstride by 16
+ add cp, cp, t6 # Move C matrix pointer down 16 rows
+
+ slti t6, m, 16
+ beqz t6, c_row_loop
+
+ # Handle end of matrix with fewer than 16 rows.
+ # Can use smaller versions of above decreasing in powers-of-2 depending on code-size concerns.
+end_rows:
+ # Not done.
+
+exit:
+ ld s0, OFFSET(sp)
+ ld s1, OFFSET(sp)
+ ld s2, OFFSET(sp)
+ addi sp, sp, FRAMESIZE
+ ret
diff --git a/src/example/strcmp.s b/src/example/strcmp.s
new file mode 100644
index 0000000..c657703
--- /dev/null
+++ b/src/example/strcmp.s
@@ -0,0 +1,34 @@
+ .text
+ .balign 4
+ .global strcmp
+ # int strcmp(const char *src1, const char* src2)
+strcmp:
+ ## Using LMUL=2, but same register names work for larger LMULs
+ li t1, 0 # Initial pointer bump
+loop:
+ vsetvli t0, x0, e8, m2, ta, ma # Max length vectors of bytes
+ add a0, a0, t1 # Bump src1 pointer
+ vle8ff.v v8, (a0) # Get src1 bytes
+ add a1, a1, t1 # Bump src2 pointer
+ vle8ff.v v16, (a1) # Get src2 bytes
+
+ vmseq.vi v0, v8, 0 # Flag zero bytes in src1
+ vmsne.vv v1, v8, v16 # Flag if src1 != src2
+ vmor.mm v0, v0, v1 # Combine exit conditions
+
+ vfirst.m a2, v0 # ==0 or != ?
+ csrr t1, vl # Get number of bytes fetched
+
+ bltz a2, loop # Loop if all same and no zero byte
+
+ add a0, a0, a2 # Get src1 element address
+ lbu a3, (a0) # Get src1 byte from memory
+
+ add a1, a1, a2 # Get src2 element address
+ lbu a4, (a1) # Get src2 byte from memory
+
+ sub a0, a3, a4 # Return value.
+
+ ret
+
+
diff --git a/src/example/strcpy.s b/src/example/strcpy.s
new file mode 100644
index 0000000..109112d
--- /dev/null
+++ b/src/example/strcpy.s
@@ -0,0 +1,20 @@
+ .text
+ .balign 4
+ .global strcpy
+ # char* strcpy(char *dst, const char* src)
+strcpy:
+ mv a2, a0 # Copy dst
+ li t0, -1 # Infinite AVL
+loop:
+ vsetvli x0, t0, e8, m8, ta, ma # Max length vectors of bytes
+ vle8ff.v v8, (a1) # Get src bytes
+ csrr t1, vl # Get number of bytes fetched
+ vmseq.vi v1, v8, 0 # Flag zero bytes
+ vfirst.m a3, v1 # Zero found?
+ add a1, a1, t1 # Bump pointer
+ vmsif.m v0, v1 # Set mask up to and including zero byte.
+ vse8.v v8, (a2), v0.t # Write out bytes
+ add a2, a2, t1 # Bump pointer
+ bltz a3, loop # Zero byte not found, so loop
+
+ ret
diff --git a/src/example/strlen.s b/src/example/strlen.s
new file mode 100644
index 0000000..1c3af4b
--- /dev/null
+++ b/src/example/strlen.s
@@ -0,0 +1,22 @@
+ .text
+ .balign 4
+ .global strlen
+# size_t strlen(const char *str)
+# a0 holds *str
+
+strlen:
+ mv a3, a0 # Save start
+loop:
+ vsetvli a1, x0, e8, m8, ta, ma # Vector of bytes of maximum length
+ vle8ff.v v8, (a3) # Load bytes
+ csrr a1, vl # Get bytes read
+ vmseq.vi v0, v8, 0 # Set v0[i] where v8[i] = 0
+ vfirst.m a2, v0 # Find first set bit
+ add a3, a3, a1 # Bump pointer
+ bltz a2, loop # Not found?
+
+ add a0, a0, a1 # Sum start + bump
+ add a3, a3, a2 # Add index
+ sub a0, a3, a0 # Subtract start address+bump
+
+ ret
diff --git a/src/example/strncpy.s b/src/example/strncpy.s
new file mode 100644
index 0000000..87e5410
--- /dev/null
+++ b/src/example/strncpy.s
@@ -0,0 +1,36 @@
+ .text
+ .balign 4
+ .global strncpy
+ # char* strncpy(char *dst, const char* src, size_t n)
+strncpy:
+ mv a3, a0 # Copy dst
+loop:
+ vsetvli x0, a2, e8, m8, ta, ma # Vectors of bytes.
+ vle8ff.v v8, (a1) # Get src bytes
+ vmseq.vi v1, v8, 0 # Flag zero bytes
+ csrr t1, vl # Get number of bytes fetched
+ vfirst.m a4, v1 # Zero found?
+ vmsbf.m v0, v1 # Set mask up to before zero byte.
+ vse8.v v8, (a3), v0.t # Write out non-zero bytes
+ bgez a4, zero_tail # Zero remaining bytes.
+ sub a2, a2, t1 # Decrement count.
+ add a3, a3, t1 # Bump dest pointer
+ add a1, a1, t1 # Bump src pointer
+ bnez a2, loop # Anymore?
+
+ ret
+
+zero_tail:
+ sub a2, a2, a4 # Subtract count on non-zero bytes.
+ add a3, a3, a4 # Advance past non-zero bytes.
+ vsetvli t1, a2, e8, m8, ta, ma # Vectors of bytes.
+ vmv.v.i v0, 0 # Splat zero.
+
+zero_loop:
+ vse8.v v0, (a3) # Store zero.
+ sub a2, a2, t1 # Decrement count.
+ add a3, a3, t1 # Bump pointer
+ vsetvli t1, a2, e8, m8, ta, ma # Vectors of bytes.
+ bnez a2, zero_loop # Anymore?
+
+ ret
diff --git a/src/example/vvaddint32.s b/src/example/vvaddint32.s
new file mode 100644
index 0000000..22305d9
--- /dev/null
+++ b/src/example/vvaddint32.s
@@ -0,0 +1,22 @@
+ .text
+ .balign 4
+ .global vvaddint32
+ # vector-vector add routine of 32-bit integers
+ # void vvaddint32(size_t n, const int*x, const int*y, int*z)
+ # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
+ #
+ # a0 = n, a1 = x, a2 = y, a3 = z
+ # Non-vector instructions are indented
+vvaddint32:
+ vsetvli t0, a0, e32, ta, ma # Set vector length based on 32-bit vectors
+ vle32.v v0, (a1) # Get first vector
+ sub a0, a0, t0 # Decrement number done
+ slli t0, t0, 2 # Multiply number done by 4 bytes
+ add a1, a1, t0 # Bump pointer
+ vle32.v v1, (a2) # Get second vector
+ add a2, a2, t0 # Bump pointer
+ vadd.vv v2, v0, v1 # Sum vectors
+ vse32.v v2, (a3) # Store result
+ add a3, a3, t0 # Bump pointer
+ bnez a0, vvaddint32 # Loop back
+ ret # Finished
diff --git a/src/extending.adoc b/src/extending.adoc
index 4ea00c0..9124a26 100644
--- a/src/extending.adoc
+++ b/src/extending.adoc
@@ -25,7 +25,7 @@ extensions.
==== Standard versus Non-Standard Extension
Any RISC-V processor implementation must support a base integer ISA
-(RV32I, RV32E, RV64I, or RV128I). In addition, an implementation may
+(RV32I, RV32E, RV64I, RV64E, or RV128I). In addition, an implementation may
support one or more extensions. We divide extensions into two broad
categories: _standard_ versus _non-standard_.
diff --git a/src/f-st-ext.adoc b/src/f-st-ext.adoc
index 13db568..24941ed 100644
--- a/src/f-st-ext.adoc
+++ b/src/f-st-ext.adoc
@@ -37,7 +37,7 @@ floating-point register file state can reduce context-switch overhead.
[[fprs]]
.RISC-V standard F extension single-precision floating-point state
-[col[s="<|^|>"|option[s="header",width="50%",align="center"grid="rows"]
+[cols="<,^,>",options="header",width="50%",align="center",grid="rows"]
|===
| [.small]#FLEN-1#| >| [.small]#0#
3+^| [.small]#f0#
@@ -143,11 +143,11 @@ the dynamic rounding mode CSR state will serialize the pipeline. Static
rounding modes are used to implement specialized arithmetic operations
that often have to switch frequently between different rounding modes.
-The ratified version of the F spec mandated that an illegal instruction
+The ratified version of the F spec mandated that an illegal-instruction
exception was raised when an instruction was executed with a reserved
dynamic rounding mode. This has been weakened to reserved, which matches
-the behavior of static rounding-mode instructions. Raising an illegal
-instruction exception is still valid behavior when encountering a
+the behavior of static rounding-mode instructions. Raising an
+illegal-instruction exception is still valid behavior when encountering a
reserved encoding, so implementations compatible with the ratified spec
are compatible with the weakened spec.
====
@@ -157,7 +157,7 @@ arisen on any floating-point arithmetic instruction since the field was
last reset by software, as shown in <<bitdef>>. The base
RISC-V ISA does not support generating a trap on the setting of a
floating-point exception flag.
-(((floating-point, excpetion flag)))
+(((floating-point, exception flag)))
[[bitdef]]
.Accrued exception flag encoding.
@@ -231,7 +231,7 @@ signals.
Floating-point loads and stores use the same base+offset addressing mode as the integer base ISAs, with a base address in register _rs1_ and a 12-bit signed byte offset. The FLW instruction loads a single-precision floating-point value from memory into floating-point register _rd_. FSW stores a single-precision value from floating-point register _rs2_ to memory.
-include::images/wavedrom/sp-load-store.adoc[]
+include::images/wavedrom/sp-load-store-2.adoc[]
[[sp-ldst]]
//.SP load and store
diff --git a/src/fraclmul.adoc b/src/fraclmul.adoc
new file mode 100644
index 0000000..6f12f58
--- /dev/null
+++ b/src/fraclmul.adoc
@@ -0,0 +1,174 @@
+=== Fractional Lmul example
+
+This appendix presents a non-normative example to help explain where
+compilers can make good use of the fractional LMUL feature.
+
+Consider the following (admittedly contrived) loop written in C:
+
+----
+void add_ref(long N,
+ signed char *restrict c_c, signed char *restrict c_a, signed char *restrict c_b,
+ long *restrict l_c, long *restrict l_a, long *restrict l_b,
+ long *restrict l_d, long *restrict l_e, long *restrict l_f,
+ long *restrict l_g, long *restrict l_h, long *restrict l_i,
+ long *restrict l_j, long *restrict l_k, long *restrict l_l,
+ long *restrict l_m) {
+ long i;
+ for (i = 0; i < N; i++) {
+ c_c[i] = c_a[i] + c_b[i]; // Note this 'char' addition that creates a mixed type situation
+ l_c[i] = l_a[i] + l_b[i];
+ l_f[i] = l_d[i] + l_e[i];
+ l_i[i] = l_g[i] + l_h[i];
+ l_l[i] = l_k[i] + l_j[i];
+ l_m[i] += l_m[i] + l_c[i] + l_f[i] + l_i[i] + l_l[i];
+ }
+}
+----
+
+The example loop has a high register pressure due to the many input variables
+and temporaries required. The compiler realizes there are two datatypes within
+the loop: an 8-bit 'char' and a 64-bit 'long *'. Without fractional LMUL, the
+compiler would be forced to use LMUL=1 for the 8-bit computation and LMUL=8 for
+the 64-bit computation(s), to have equal number of elements on all computations
+within the same loop iteration. Under LMUL=8, only 4 registers are available
+to the register allocator. Given the large number of 64-bit variables and
+temporaries required in this loop, the compiler ends up generating a lot of
+spill code. The code below demonstrates this effect:
+
+----
+.LBB0_4: # %vector.body
+ # =>This Inner Loop Header: Depth=1
+ add s9, a2, s6
+ vsetvli s1, zero, e8,m1,ta,mu
+ vle8.v v25, (s9)
+ add s1, a3, s6
+ vle8.v v26, (s1)
+ vadd.vv v25, v26, v25
+ add s1, a1, s6
+ vse8.v v25, (s1)
+ add s9, a5, s10
+ vsetvli s1, zero, e64,m8,ta,mu
+ vle64.v v8, (s9)
+ add s1, a6, s10
+ vle64.v v16, (s1)
+ add s1, a7, s10
+ vle64.v v24, (s1)
+ add s1, s3, s10
+ vle64.v v0, (s1)
+ sd a0, -112(s0)
+ ld a0, -128(s0)
+ vs8r.v v0, (a0) # Spill LMUL=8
+ add s9, t6, s10
+ add s11, t5, s10
+ add ra, t2, s10
+ add s1, t3, s10
+ vle64.v v0, (s9)
+ ld s9, -136(s0)
+ vs8r.v v0, (s9) # Spill LMUL=8
+ vle64.v v0, (s11)
+ ld s9, -144(s0)
+ vs8r.v v0, (s9) # Spill LMUL=8
+ vle64.v v0, (ra)
+ ld s9, -160(s0)
+ vs8r.v v0, (s9) # Spill LMUL=8
+ vle64.v v0, (s1)
+ ld s1, -152(s0)
+ vs8r.v v0, (s1) # Spill LMUL=8
+ vadd.vv v16, v16, v8
+ ld s1, -128(s0)
+ vl8r.v v8, (s1) # Reload LMUL=8
+ vadd.vv v8, v8, v24
+ ld s1, -136(s0)
+ vl8r.v v24, (s1) # Reload LMUL=8
+ ld s1, -144(s0)
+ vl8r.v v0, (s1) # Reload LMUL=8
+ vadd.vv v24, v0, v24
+ ld s1, -128(s0)
+ vs8r.v v24, (s1) # Spill LMUL=8
+ ld s1, -152(s0)
+ vl8r.v v0, (s1) # Reload LMUL=8
+ ld s1, -160(s0)
+ vl8r.v v24, (s1) # Reload LMUL=8
+ vadd.vv v0, v0, v24
+ add s1, a4, s10
+ vse64.v v16, (s1)
+ add s1, s2, s10
+ vse64.v v8, (s1)
+ vadd.vv v8, v8, v16
+ add s1, t4, s10
+ ld s9, -128(s0)
+ vl8r.v v16, (s9) # Reload LMUL=8
+ vse64.v v16, (s1)
+ add s9, t0, s10
+ vadd.vv v8, v8, v16
+ vle64.v v16, (s9)
+ add s1, t1, s10
+ vse64.v v0, (s1)
+ vadd.vv v8, v8, v0
+ vsll.vi v16, v16, 1
+ vadd.vv v8, v8, v16
+ vse64.v v8, (s9)
+ add s6, s6, s7
+ add s10, s10, s8
+ bne s6, s4, .LBB0_4
+----
+
+If instead of using LMUL=1 for the 8-bit computation, the compiler is allowed
+to use a fractional LMUL=1/2, then the 64-bit computations can be performed
+using LMUL=4 (note that the same ratio of 64-bit elements and 8-bit elements is
+preserved as in the previous example). Now the compiler has 8 available
+registers to perform register allocation, resulting in no spill code, as
+shown in the loop below:
+
+----
+.LBB0_4: # %vector.body
+ # =>This Inner Loop Header: Depth=1
+ add s9, a2, s6
+ vsetvli s1, zero, e8,mf2,ta,mu // LMUL=1/2 !
+ vle8.v v25, (s9)
+ add s1, a3, s6
+ vle8.v v26, (s1)
+ vadd.vv v25, v26, v25
+ add s1, a1, s6
+ vse8.v v25, (s1)
+ add s9, a5, s10
+ vsetvli s1, zero, e64,m4,ta,mu // LMUL=4
+ vle64.v v28, (s9)
+ add s1, a6, s10
+ vle64.v v8, (s1)
+ vadd.vv v28, v8, v28
+ add s1, a7, s10
+ vle64.v v8, (s1)
+ add s1, s3, s10
+ vle64.v v12, (s1)
+ add s1, t6, s10
+ vle64.v v16, (s1)
+ add s1, t5, s10
+ vle64.v v20, (s1)
+ add s1, a4, s10
+ vse64.v v28, (s1)
+ vadd.vv v8, v12, v8
+ vadd.vv v12, v20, v16
+ add s1, t2, s10
+ vle64.v v16, (s1)
+ add s1, t3, s10
+ vle64.v v20, (s1)
+ add s1, s2, s10
+ vse64.v v8, (s1)
+ add s9, t4, s10
+ vadd.vv v16, v20, v16
+ add s11, t0, s10
+ vle64.v v20, (s11)
+ vse64.v v12, (s9)
+ add s1, t1, s10
+ vse64.v v16, (s1)
+ vsll.vi v20, v20, 1
+ vadd.vv v28, v8, v28
+ vadd.vv v28, v28, v12
+ vadd.vv v28, v28, v16
+ vadd.vv v28, v28, v20
+ vse64.v v28, (s11)
+ add s6, s6, s7
+ add s10, s10, s8
+ bne s6, s4, .LBB0_4
+----
diff --git a/src/hypervisor.adoc b/src/hypervisor.adoc
index f97863d..e4775b5 100644
--- a/src/hypervisor.adoc
+++ b/src/hypervisor.adoc
@@ -21,7 +21,7 @@ same SBI as an OS normally does from S-mode. An HS-mode hypervisor is
expected to implement the SBI for its VS-mode guest.
The hypervisor extension depends on an "I" base integer ISA with 32
-`x` registers (RV32I or RV64I), not RV32E, which has only 16 `x`
+`x` registers (RV32I or RV64I), not RV32E or RV64E, which have only 16 `x`
registers. CSR `mtval` must not be read-only zero, and standard
page-based address translation must be supported, either Sv32 for RV32,
or a minimum of Sv39 for RV64.
@@ -62,14 +62,15 @@ possible privilege modes of a RISC-V hart with the hypervisor extension.
[[HPrivModes]]
.Privilege modes with the hypervisor extension.
-[float="center",align="center",cols="^,^,<,3,<"]
+[float="center",align="center",cols="~,~,~,~,~"]
|===
-|Virtualization Mode (V) |Nominal Privilege |Abbreviation |Name |Two-Stage Translation
+^|Virtualization +
+Mode (V) ^|Nominal Privilege |Abbreviation |Name |Two-Stage Translation
-|0 +
+^|0 +
0 +
0
-| U +
+^| U +
S +
M
|U-mode +
@@ -81,9 +82,9 @@ Machine mode
|Off +
Off +
Off
-|1 +
+^|1 +
1
-|U +
+^|U +
S
|VU-mode +
VS-mode
@@ -126,8 +127,8 @@ taking over all functions of the usual supervisor CSRs except as
specified otherwise. Instructions that normally read or modify a
supervisor CSR shall instead access the corresponding VS CSR. When V=1,
an attempt to read or write a VS CSR directly by its own separate CSR
-address causes a virtual instruction exception. (Attempts from U-mode
-cause an illegal instruction exception as usual.) The VS CSRs can be
+address causes a virtual-instruction exception. (Attempts from U-mode
+cause an illegal-instruction exception as usual.) The VS CSRs can be
accessed as themselves only from M-mode or HS-mode.
While V=1, the normal HS-level supervisor CSRs that are replaced by VS
@@ -188,17 +189,17 @@ widest supported width not wider than the new HSXLEN.
The `hstatus` fields VTSR, VTW, and VTVM are defined analogously to the
`mstatus` fields TSR, TW, and TVM, but affect execution only in VS-mode,
-and cause virtual instruction exceptions instead of illegal instruction
+and cause virtual-instruction exceptions instead of illegal-instruction
exceptions. When VTSR=1, an attempt in VS-mode to execute SRET raises a
-virtual instruction exception. When VTW=1 (and assuming `mstatus`.TW=0),
-an attempt in VS-mode to execute WFI raises a virtual instruction
+virtual-instruction exception. When VTW=1 (and assuming `mstatus`.TW=0),
+an attempt in VS-mode to execute WFI raises a virtual-instruction
exception if the WFI does not complete within an
implementation-specific, bounded time limit. An implementation may have
-WFI always raise a virtual instruction exception in VS-mode when VTW=1
+WFI always raise a virtual-instruction exception in VS-mode when VTW=1
(and `mstatus`.TW=0), even if there are pending globally-disabled
interrupts when the instruction is executed. When VTVM=1, an attempt in
VS-mode to execute SFENCE.VMA or SINVAL.VMA or to access CSR `satp`
-raises a virtual instruction exception.
+raises a virtual-instruction exception.
The VGEIN (Virtual Guest External Interrupt Number) field selects a
guest external interrupt source for VS-level external interrupts. VGEIN
@@ -213,8 +214,8 @@ further in <<hinterruptregs>>.
Field HU (Hypervisor in U-mode) controls whether the virtual-machine
load/store instructions, HLV, HLVX, and HSV, can be used also in U-mode.
When HU=1, these instructions can be executed in U-mode the same as in
-HS-mode. When HU=0, all hypervisor instructions cause an illegal
-instruction trap in U-mode.
+HS-mode. When HU=0, all hypervisor instructions cause an
+illegal-instruction exception in U-mode.
[NOTE]
====
@@ -271,9 +272,11 @@ same endianness as HS-mode.
==== Hypervisor Trap Delegation Registers (`hedeleg` and `hideleg`)
-Registers `hedeleg` and `hideleg` are HSXLEN-bit read/write registers,
-formatted as shown in <<hedelegreg>> and
-<<hidelegreg>> respectively. By default, all traps at
+Register `hedeleg` is a 64-bit read/write register, formatted as shown in
+<<hedelegreg>>.
+Register `hideleg` is an HSXLEN-bit read/write register, formatted as shown in
+<<hidelegreg>>.
+By default, all traps at
any privilege level are handled in M-mode, though M-mode usually uses
the `medeleg` and `mideleg` CSRs to delegate some traps to HS-mode. The
`hedeleg` and `hideleg` CSRs allow these traps to be further delegated
@@ -302,6 +305,10 @@ Requiring that certain bits of `hedeleg` be writable reduces some of the
burden on a hypervisor to handle variations of implementation.
====
+When XLEN=32, `hedelegh` is a 32-bit read/write register
+that aliases bits 63:32 of `hedeleg`.
+Register `hedelegh` does not exist when XLEN=64.
+
An interrupt that has been delegated to HS-mode (using `mideleg`) is
further delegated to VS-mode if the corresponding `hideleg` bit is set.
Among bits 15:0 of `hideleg`, bits 10, 6, and 2 (corresponding to the
@@ -315,7 +322,7 @@ external interrupt (code 9) for VS-mode, including the value written to
interrupt (6) is translated into a supervisor timer interrupt (5) for
VS-mode, and a virtual supervisor software interrupt (2) is translated
into a supervisor software interrupt (1) for VS-mode. Similar
-translations may or may not be done for platform or custom interrupt
+translations may or may not be done for platform interrupt
causes (codes 16 and above).
[[hedeleg-bits]]
@@ -338,6 +345,8 @@ causes (codes 16 and above).
12 +
13 +
15 +
+18 +
+19 +
20 +
21 +
22 +
@@ -357,6 +366,8 @@ Read-only 0 +
Writable +
Writable +
Writable +
+Writable +
+Writable +
Read-only 0 +
Read-only 0 +
Read-only 0 +
@@ -376,6 +387,8 @@ Environment call from M-mode +
Instruction page fault +
Load page fault +
Store/AMO page fault +
+Software check +
+Hardware error +
Instruction guest-page fault +
Load guest-page fault +
Virtual instruction +
@@ -557,6 +570,7 @@ cause a supervisor-level (HS-level) guest external interrupt. The enable
bits in `hgeie` do not affect the VS-level external interrupt signal
selected from `hgeip` by `hstatus`.VGEIN.
+[[sec:henvcfg]]
==== Hypervisor Environment Configuration Register (`henvcfg`)
The `henvcfg` CSR is a 64-bit read/write register, formatted
@@ -600,6 +614,15 @@ VS-stage address translation. When PBMTE=0, the implementation behaves
as though Svpbmt were not implemented for VS-stage address translation.
If Svpbmt is not implemented, PBMTE is read-only zero.
+If the Svadu extension is implemented, the ADUE bit controls whether hardware
+updating of PTE A/D bits is enabled for VS-stage address translation.
+When ADUE=1, hardware updating of PTE A/D bits is enabled during VS-stage
+address translation, and the implementation behaves as though the Svade
+extension were not implemented for VS-mode address translation.
+When ADUE=0, the implementation behaves as though Svade were implemented for
+VS-stage address translation.
+If Svadu is not implemented, ADUE is read-only zero.
+
The definition of the STCE field will be furnished by the forthcoming
Sstc extension. Its allocation within `henvcfg` may change prior to the
@@ -611,6 +634,10 @@ The definitions of the CBCFE and CBIE fields will be furnished by the
forthcoming Zicbom extension. Their allocations within `henvcfg` may
change prior to the ratification of that extension.
+The definition of the PMM field will be furnished by the forthcoming
+Ssnpm extension. Its allocation within `henvcfg` may change prior to the
+ratification of that extension.
+
When XLEN=32, `henvcfgh` is a
32-bit read/write register that aliases bits 63:32
of `henvcfg`. Register `henvcfgh` does not exist when
@@ -627,7 +654,7 @@ include::images/bytefield/hcounterenreg.edn[]
When the CY, TM, IR, or HPM_n_ bit in the `hcounteren` register is
clear, attempts to read the `cycle`, `time`, `instret`, or
-`hpmcounter` _n_ register while V=1 will cause a virtual instruction
+`hpmcounter` _n_ register while V=1 will cause a virtual-instruction
exception if the same bit in `mcounteren` is 1. When one of these bits
is set, access to the corresponding register is permitted when V=1,
unless prevented for some other reason. In VU-mode, a counter is not
@@ -749,7 +776,7 @@ page table; a virtual machine identifier (VMID), which facilitates
address-translation fences on a per-virtual-machine basis; and the MODE
field, which selects the address-translation scheme for guest physical
addresses. When `mstatus`.TVM=1, attempts to read or write `hgatp` while
-executing in HS-mode will raise an illegal instruction exception.
+executing in HS-mode will raise an illegal-instruction exception.
[[rv32hgatp]]
.Hypervisor guest address translation and protection register `hgatp` when HSXLEN=32.
@@ -819,7 +846,7 @@ Implementations are not required to support all defined MODE settings
when HSXLEN=64.
A write to `hgatp` with an unsupported MODE value is not ignored as it
-is for `satp`. Instead, the fields of `hgatp` are in the normal way,
+is for `satp`. Instead, the fields of `hgatp` are *WARL* in the normal way,
when so indicated.
As explained in <<guest-addr-translation>>, for the
@@ -1133,9 +1160,9 @@ RV32, HLVX.WU can be considered a variant of HLV.W, as sign extension is
irrelevant for 32-bit values.)
Attempts to execute a virtual-machine load/store instruction (HLV, HLVX,
-or HSV) when V=1 cause a virtual instruction trap. Attempts to execute
+or HSV) when V=1 cause a virtual-instruction exception. Attempts to execute
one of these same instructions from U-mode when `hstatus`.HU=0 cause an
-illegal instruction trap.
+illegal-instruction exception.
[[hfence.vma]]
==== Hypervisor Memory-Management Fence Instructions
@@ -1244,10 +1271,10 @@ _rs1_=`x0` (and _rs2_ set to either `x0` or the VMID) must be executed
to order subsequent guest translations with the MODE change—even if the
old MODE or new MODE is Bare.
-Attempts to execute HFENCE.VVMA or HFENCE.GVMA when V=1 cause a virtual
-instruction trap, while attempts to do the same in U-mode cause an
-illegal instruction trap. Attempting to execute HFENCE.GVMA in HS-mode
-when `mstatus`.TVM=1 also causes an illegal instruction trap.
+Attempts to execute HFENCE.VVMA or HFENCE.GVMA when V=1 cause a
+virtual-instruction exception, while attempts to do the same in U-mode cause an
+illegal-instruction exception. Attempting to execute HFENCE.GVMA in HS-mode
+when `mstatus`.TVM=1 also causes an illegal-instruction exception.
=== Machine-Level CSRs
@@ -1410,7 +1437,7 @@ include::images/bytefield/mtval2reg.edn[]
When a guest-page-fault trap is taken into M-mode, `mtval2` is written
with either zero or the guest physical address that faulted, shifted
right by 2 bits. For other traps, `mtval2` is set to zero, but a future
-standard or extension may redefine `mtval2`'s setting for other traps.
+standard or extension may redefine `mtval2's` setting for other traps.
If a guest-page fault is due to an implicit memory access during
first-stage (VS-stage) address translation, a guest physical address
@@ -1585,10 +1612,11 @@ considered to be user-level accesses, as though executed in U-mode.
Access type permissions—readable, writable, or executable—are checked
during G-stage translation the same as for VS-stage translation. For a
memory access made to support VS-stage address translation (such as to
-read/write a VS-level page table), permissions are checked as though for
-a load or store, not for the original access type. However, any
-exception is always reported for the original access type (instruction,
-load, or store/AMO).
+read/write a VS-level page table), permissions and the need to set A
+and/or D bits at the G-stage level are checked as though for an implicit
+load or store, not for the original access type. However, any exception
+is always reported for the original access type (instruction, load, or
+store/AMO).
The G bit in all G-stage PTEs is reserved for future standard use. Until
its use is defined by a standard extension, it should be cleared by
@@ -1628,6 +1656,7 @@ VS-stage address translation, a nonzero guest physical address written
to `mtval2`/`htval` shall correspond to the exact virtual address
written to `mtval`/`stval`.
+[[hyp-mm-fences]]
==== Memory-Management Fences
The behavior of the SFENCE.VMA instruction is affected by the current
@@ -1665,22 +1694,39 @@ address-translation cache entries that have cached PMP settings
corresponding to the final translated supervisor physical address. An
HFENCE.VVMA instruction is not required.
+Similarly, if the setting of the PBMTE bit in `menvcfg` is changed, an
+HFENCE.GVMA instruction with _rs1_=`x0` and _rs2_=`x0` suffices to synchronize
+with respect to the altered interpretation of G-stage and VS-stage PTEs' PBMT
+fields.
+
+By contrast, if the PBMTE bit in `henvcfg` is changed, executing an
+HFENCE.VVMA with _rs1_=`x0` and _rs2_=`x0` suffices to synchronize with
+respect to the altered interpretation of VS-stage PTEs' PBMT fields for the
+currently active VMID.
+
+NOTE: No mechanism is provided to atomically change `vsatp` and `hgatp`
+together. Hence, to prevent speculative execution causing one guest's
+VS-stage translations to be cached under another guest's VMID, world-switch
+code should zero `vsatp`, then swap `hgatp`, then finally write the new
+`vstap` value. Similarly, if `henvcfg`.PBMTE need be world-switched, it
+should be switched after zeroing `vsatp` but before writing the new `vsatp`
+value, obviating the need to execute an HFENCE.VVMA instruction.
+
=== Traps
+[[sec:hcauses]]
==== Trap Cause Codes
The hypervisor extension augments the trap cause encoding.
<<hcauses>> lists the possible M-mode and HS-mode
trap cause codes when the hypervisor extension is implemented. Codes are
added for VS-level interrupts (interrupts 2, 6, 10), for
-supervisor-level guest external interrupts (interrupt 12), for virtual
-instruction exceptions (exception 22), and for guest-page faults
+supervisor-level guest external interrupts (interrupt 12), for
+virtual-instruction exceptions (exception 22), and for guest-page faults
(exceptions 20, 21, 23). Furthermore, environment calls from VS-mode are
assigned cause 10, whereas those from HS-mode or S-mode use cause 9 as
usual.
-<<<
-
[[hcauses]]
.Machine and supervisor cause register (`mcause` and `scause`) values when the hypervisor extension is implemented.
[%autowidth,float="center",align="center",cols=">,>,<",options="header"]
@@ -1724,13 +1770,16 @@ Virtual supervisor external interrupt +
Machine external interrupt
|1 +
1 +
+1 +
1
|12 +
-13-15 +
+13 +
+14-15 +
&#8805;16
|Supervisor guest external interrupt +
+_Reserved for counter-overflow interrupt_ +
_Reserved_ +
-_Designated for platform or custom use_
+_Designated for platform use_
|0 +
0 +
0 +
@@ -1811,8 +1860,8 @@ _Reserved_
HS-mode and VS-mode ECALLs use different cause values so they can be
delegated separately.
-When V=1, a virtual instruction exception (code 22) is normally raised
-instead of an illegal instruction exception if the attempted instruction
+When V=1, a virtual-instruction exception (code 22) is normally raised
+instead of an illegal-instruction exception if the attempted instruction
is _HS-qualified_ but is prevented from executing when V=1 either due to
insufficient privilege or because the instruction is expressly disabled
by a supervisor or hypervisor CSR such as `scounteren` or `hcounteren`.
@@ -1823,18 +1872,18 @@ assuming fields TSR and TVM of CSR `mstatus` are both zero.
A special rule applies for CSR instructions that access 32-bit high-half
CSRs such as `cycleh` and `htimedeltah`. When V=1 and
XLEN=32, an invalid attempt to access a high-half CSR
-raises a virtual instruction
-exception instead of an illegal instruction exception if the same CSR
+raises a virtual-instruction
+exception instead of an illegal-instruction exception if the same CSR
instruction for the corresponding _low-half_ CSR (e.g.`cycle` or
`htimedelta`) is HS-qualified.
[NOTE]
====
When XLEN>32, an attempt to access a high-half CSR
-always raises an illegal instruction exception.
+always raises an illegal-instruction exception.
====
-Specifically, a virtual instruction exception is raised for the
+Specifically, a virtual-instruction exception is raised for the
following cases:
* in VS-mode, attempts to access a non-high-half counter CSR when the
@@ -1874,25 +1923,25 @@ implementation-specific, bounded time;
instruction or to access `satp`, when `hstatus`.VTVM=1.
Other extensions to the RISC-V Privileged Architecture may add to the
-set of circumstances that cause a virtual instruction exception when
+set of circumstances that cause a virtual-instruction exception when
V=1.
-On a virtual instruction trap, `mtval` or `stval` is written the same as
-for an illegal instruction trap.
+On a virtual-instruction trap, `mtval` or `stval` is written the same as
+for an illegal-instruction trap.
[NOTE]
====
It is not unusual that hypervisors must emulate the instructions that
-raise virtual instruction exceptions, to support nested hypervisors or
+raise virtual-instruction exceptions, to support nested hypervisors or
for other reasons. Machine level is expected ordinarily to delegate
-virtual instruction traps directly to HS-level, whereas illegal
-instruction traps are likely to be processed first in M-mode before
+virtual-instruction traps directly to HS-level, whereas
+illegal-instruction traps are likely to be processed first in M-mode before
being conditionally delegated (by software) to HS-level. Consequently,
-virtual instruction traps are expected typically to be handled faster
-than illegal instruction traps.
+virtual-instruction traps are expected typically to be handled faster
+than illegal-instruction traps.
When not emulating the trapping instruction, a hypervisor should convert
-a virtual instruction trap into an illegal instruction exception for the
+a virtual-instruction trap into an illegal-instruction exception for the
guest virtual machine.
***
@@ -2285,7 +2334,7 @@ nonzero value (the faulting guest physical address) is written to
<<pseudoinsts>>; zero is not allowed.
[[pseudoinsts]]
-.Special pseudoinstruction values for guest-page faults. The RV32 values are used when VSXLEN=32, and the TV64 values when VSXLEN=64.
+.Special pseudoinstruction values for guest-page faults. The RV32 values are used when VSXLEN=32, and the RV64 values when VSXLEN=64.
[%autowidth,float="center",align="center",cols="<,<",options="header"]
|===
|Value |Meaning
diff --git a/src/images/bytefield/hedelegreg.edn b/src/images/bytefield/hedelegreg.edn
index 8348f22..48c452c 100644
--- a/src/images/bytefield/hedelegreg.edn
+++ b/src/images/bytefield/hedelegreg.edn
@@ -7,11 +7,11 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "HSXLEN-1" {:span 16 :text-anchor "start" :borders{}})
+(draw-box "63" {:span 16 :text-anchor "start" :borders{}})
(draw-box "0" {:span 16 :text-anchor "end" :borders{}})
(draw-box "Synchronous Exceptions" {:span 18 :text-anchor "end" :borders{:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
(draw-box (text "(WARL)" { :font-weight "bold" :font-size 24}) {:span 14 :text-anchor "start" :borders{:top :border-unrelated :bottom :border-unrelated :right :border-unrelated}})
-(draw-box "HSXLEN" {:span 32 :borders {}})
----- \ No newline at end of file
+(draw-box "64" {:span 32 :borders {}})
+----
diff --git a/src/images/bytefield/henvcfg.edn b/src/images/bytefield/henvcfg.edn
index 812713c..4c03b50 100644
--- a/src/images/bytefield/henvcfg.edn
+++ b/src/images/bytefield/henvcfg.edn
@@ -5,12 +5,17 @@
(def row-header-fn nil)
(def left-margin 30)
(def right-margin 30)
-(def boxes-per-row 32)
+(def boxes-per-row 33)
(draw-box "63" {:span 2 :borders {}})
(draw-box "62" {:span 2 :borders {}})
-(draw-box "61" {:span 9 :text-anchor "start" :borders {}})
-(draw-box "8" {:span 9 :text-anchor "end" :borders {}})
+(draw-box "61" {:span 2 :borders {}})
+(draw-box "60" {:span 3 :text-anchor "start" :borders {}})
+(draw-box "34" {:span 4 :text-anchor "end" :borders {}})
+(draw-box "33" {:span 2 :text-anchor "start" :borders {}})
+(draw-box "32" {:span 1 :text-anchor "end" :borders {}})
+(draw-box "31" {:span 4 :text-anchor "start" :borders {}})
+(draw-box "8" {:span 3 :text-anchor "end" :borders {}})
(draw-box "7" {:span 2 :borders {}})
(draw-box "6" {:span 2 :borders {}})
(draw-box "5" {:borders {}})
@@ -21,7 +26,10 @@
(draw-box "STCE" {:span 2})
(draw-box "PBMTE" {:font-size 22 :span 2})
-(draw-box (text "WPRI" {:font-size 24 :font-weight "bold"}) {:span 18})
+(draw-box "ADUE" {:span 2})
+(draw-box (text "WPRI" {:font-size 24 :font-weight "bold"}) {:span 7})
+(draw-box "PMM" {:span 3})
+(draw-box (text "WPRI" {:font-size 24 :font-weight "bold"}) {:span 7})
(draw-box "CBZE" {:span 2})
(draw-box "CBCFE" {:span 2})
(draw-box "CBIE" {:span 2})
@@ -30,10 +38,13 @@
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
-(draw-box "54" {:span 18 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
+(draw-box "27" {:span 7 :borders {}})
+(draw-box "2" {:span 3 :borders {}})
+(draw-box "24" {:span 7 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "2" {:span 2 :borders {}})
(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/hstatusreg-rv32.edn b/src/images/bytefield/hstatusreg-rv32.edn
index 02db585..2762ce6 100644
--- a/src/images/bytefield/hstatusreg-rv32.edn
+++ b/src/images/bytefield/hstatusreg-rv32.edn
@@ -51,9 +51,9 @@
(draw-box "6" {:span 5 :borders {}})
(draw-box "2" {:span 2 :borders {}})
(draw-box "1" {:borders {}})
-(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 3 :borders {}})
(draw-box "1" {:span 3 :borders {}})
-(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
(draw-box "5" {:span 2 :borders {}})
----
diff --git a/src/images/bytefield/hstatusreg.edn b/src/images/bytefield/hstatusreg.edn
index cff75db..cce601e 100644
--- a/src/images/bytefield/hstatusreg.edn
+++ b/src/images/bytefield/hstatusreg.edn
@@ -8,7 +8,7 @@
(def boxes-per-row 32)
(draw-box nil {:span 3 :borders {}})
-(draw-box "HSXLEN-1" {:span 8 :borders {} :text-anchor "start"})
+(draw-box "63" {:span 8 :borders {} :text-anchor "start"})
(draw-box "34" {:borders {}})
(draw-box "33" {:span 2 :borders {} :text-anchor "start"})
(draw-box "32" {:span 2 :borders {} :text-anchor "end"})
@@ -31,7 +31,7 @@
(draw-box nil {:span 3 :borders {}})
(draw-box nil {:span 3 :borders {}})
-(draw-box "HSXLEN-34" {:span 9 :borders {}})
+(draw-box "30" {:span 9 :borders {}})
(draw-box "2" {:span 4 :borders {}})
(draw-box "9" {:span 6 :borders {}})
(draw-box "1" {:span 2 :borders {}})
diff --git a/src/images/bytefield/hypv-mstatus.edn b/src/images/bytefield/hypv-mstatus.edn
index 2ed4a4d..885dc00 100644
--- a/src/images/bytefield/hypv-mstatus.edn
+++ b/src/images/bytefield/hypv-mstatus.edn
@@ -7,8 +7,8 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "MSXLEN-1" {:span 3 :borders {}})
-(draw-box "MXLEN-2" {:span 4 :text-anchor "start" :borders {}})
+(draw-box "63" {:span 3 :borders {}})
+(draw-box "62" {:span 4 :text-anchor "start" :borders {}})
(draw-box "40" {:span 4 :text-anchor "end" :borders {}})
(draw-box "39" {:span 3 :borders {}})
(draw-box "38" {:span 3 :borders {}})
@@ -31,7 +31,7 @@
(draw-box nil {:borders {:top :border-unrelated :bottom :border-unrelated}})
(draw-box "1" {:span 3 :borders {}})
-(draw-box "MXLEN-41" {:span 8 :borders {}})
+(draw-box "23" {:span 8 :borders {}})
(draw-box "1" {:span 3 :borders {}})
(draw-box "1" {:span 3 :borders {}})
(draw-box "1" {:span 3 :borders {}})
diff --git a/src/images/bytefield/medeleg.adoc b/src/images/bytefield/medeleg.adoc
index 2abd97b..a63156d 100644
--- a/src/images/bytefield/medeleg.adoc
+++ b/src/images/bytefield/medeleg.adoc
@@ -7,11 +7,11 @@
(def right-margin 100)
(def boxes-per-row 32)
-(draw-box "MXLEN-1" {:span 16 :text-anchor "start" :borders {}})
+(draw-box "63" {:span 16 :text-anchor "start" :borders {}})
(draw-box "0" {:span 16 :text-anchor "end" :borders {}})
(draw-box "Synchronous Exceptions" {:font-size 18 :span 18 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
(draw-box (text "(WARL)" {:font-weight "bold"}) {:font-size 18 :span 14 :text-anchor "start" :borders {:top :border-unrelated :bottom :border-unrelated :right :border-unrelated}})
-(draw-box "MXLEN" {:font-size 24 :span 32 :borders {}})
----- \ No newline at end of file
+(draw-box "64" {:font-size 24 :span 32 :borders {}})
+----
diff --git a/src/images/bytefield/menvcfgreg.adoc b/src/images/bytefield/menvcfgreg.adoc
index dc90900..979634a 100644
--- a/src/images/bytefield/menvcfgreg.adoc
+++ b/src/images/bytefield/menvcfgreg.adoc
@@ -3,12 +3,15 @@
(defattrs :plain [:plain {:font-family "M+ 1p Fallback"}])
(def row-height 45)
(def row-header-fn nil)
-(def boxes-per-row 32)
-(draw-column-headers {:height 20 :font-size 18 :labels (reverse ["0" "" "1" "3" "4" "5" "6" "" "7" "" "8" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "61" "" "62" "" "63"])})
+(def boxes-per-row 33)
+(draw-column-headers {:height 20 :font-size 18 :labels (reverse ["0" "" "1" "3" "4" "5" "6" "" "7" "" "8" "" "" "" "" "" "31" "32" "" "33" "34" "" "" "" "" "" "60" "" "61" "" "62" "" "63"])})
(draw-box "STCE" {:span 2})
(draw-box "PBMTE" {:span 2})
-(draw-box (text "WPRI" {:font-weight "bold"}) {:span 18})
+(draw-box "ADUE" {:span 2})
+(draw-box (text "WPRI" {:font-weight "bold"}) {:span 7})
+(draw-box "PMM" {:span 3})
+(draw-box (text "WPRI" {:font-weight "bold"}) {:span 7})
(draw-box "CBZE" {:span 2})
(draw-box "CBCFE" {:span 2})
(draw-box "CBIE" {:span 2})
@@ -17,10 +20,13 @@
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
-(draw-box "54" {:span 18 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
+(draw-box "27" {:span 7 :borders {}})
+(draw-box "2" {:span 3 :borders {}})
+(draw-box "24" {:span 7 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "2" {:span 2 :borders {}})
(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/miereg-standard.adoc b/src/images/bytefield/miereg-standard.adoc
index d069e9e..680fb1c 100644
--- a/src/images/bytefield/miereg-standard.adoc
+++ b/src/images/bytefield/miereg-standard.adoc
@@ -6,9 +6,11 @@
(def left-margin 100)
(def right-margin 100)
(def boxes-per-row 16)
-(draw-column-headers {:labels (reverse ["0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "" "" "15"])})
+(draw-column-headers {:labels (reverse ["0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"])})
-(draw-box "0" {:span 4})
+(draw-box "0" {:span 2})
+(draw-box (text "LCOFIE" {:font-size 10}) {:span 1})
+(draw-box "0" {:span 1})
(draw-box "MEIE" {:span 1})
(draw-box "0" {:span 1})
(draw-box "SEIE" {:span 1})
@@ -22,7 +24,8 @@
(draw-box "SSIE" {:span 1})
(draw-box "0" {:span 1})
-(draw-box "4" {:span 4 :borders {}})
+(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
@@ -35,4 +38,5 @@
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
----- \ No newline at end of file
+(draw-box "1" {:span 1 :borders {}})
+----
diff --git a/src/images/bytefield/mipreg-standard.adoc b/src/images/bytefield/mipreg-standard.adoc
index 2b33776..e32e302 100644
--- a/src/images/bytefield/mipreg-standard.adoc
+++ b/src/images/bytefield/mipreg-standard.adoc
@@ -6,9 +6,11 @@
(def left-margin 100)
(def right-margin 100)
(def boxes-per-row 16)
-(draw-column-headers {:labels (reverse ["0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "" "" "15"])})
+(draw-column-headers {:labels (reverse ["0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"])})
-(draw-box "0" {:span 4})
+(draw-box "0" {:span 2})
+(draw-box (text "LCOFIP" {:font-size 10}) {:span 1})
+(draw-box "0" {:span 1})
(draw-box "MEIP" {:span 1})
(draw-box "0" {:span 1})
(draw-box "SEIP" {:span 1})
@@ -22,7 +24,8 @@
(draw-box "SSIP" {:span 1})
(draw-box "0" {:span 1})
-(draw-box "4" {:span 4 :borders {}})
+(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
@@ -35,4 +38,5 @@
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
(draw-box "1" {:span 1 :borders {}})
----- \ No newline at end of file
+(draw-box "1" {:span 1 :borders {}})
+----
diff --git a/src/images/bytefield/mncause.edn b/src/images/bytefield/mncause.edn
index 5323f24..0b56e9b 100644
--- a/src/images/bytefield/mncause.edn
+++ b/src/images/bytefield/mncause.edn
@@ -8,9 +8,9 @@
(def boxes-per-row 32)
(draw-column-headers {:height 24 :font-size 24 :labels (reverse ["0" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "MXLEN-2" "" "" "" "MXLEN-1" ""])})
-(draw-box "1" {:span 4})
-(draw-box (text "NMI Cause" {:font-size 24}) {:span 14 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
+(draw-box "Interrupt" {:span 4})
+(draw-box (text "Exception Code" {:font-size 24}) {:span 14 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
(draw-box (text "(WARL)" {:font-weight "bold" :font-size 24}) {:span 14 :text-anchor "start" :borders {:top :border-unrelated :right :border-unrelated :bottom :border-unrelated}})
(draw-box "1" {:span 4 :borders {}})
(draw-box "MXLEN-1" {:font-size 24 :span 28 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/mnstatus.edn b/src/images/bytefield/mnstatus.edn
index 186bfb8..8a5f39d 100644
--- a/src/images/bytefield/mnstatus.edn
+++ b/src/images/bytefield/mnstatus.edn
@@ -5,25 +5,29 @@
(def row-header-fn nil)
(def left-margin 30)
(def right-margin 30)
-(def boxes-per-row 32)
-(draw-column-headers {:height 24 :font-size 24 :labels (reverse ["0" "" "" "2" "" "3" "4" "" "" "6" "" "" "" "7" "" "" "8" "" "" "10" "11" "" "" "12" "13" "" "" "" "" "" "MXLEN-1" ""])})
+(def boxes-per-row 35)
+(draw-column-headers {:height 24 :font-size 24 :labels (reverse ["0" "" "" "2" "" "3" "4" "" "" "6" "" "" "7" "" "" "8" "" "" "9" "" "" "10" "" "11" "" "" "12" "13" "" "" "" "" "" "MXLEN-1" ""])})
(draw-box (text "Reserved" {:font-style "italic" :font-size 24}) {:span 8})
(draw-box (text "MNPP" {:font-size 24}) {:span 2 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
(draw-box (text "(WARL)" {:font-weight "bold" :font-size 20}) {:span 2 :text-anchor "start" :borders {:top :border-unrelated :right :border-unrelated :bottom :border-unrelated}})
-(draw-box (text "Reserved" {:font-style "italic" :font-size 24}) {:span 4})
-(draw-box (text "MNPV" {:font-size 24}) {:span 3 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
-(draw-box (text "(WARL)" {:font-weight "bold" :font-size 24}) {:span 3 :text-anchor "start" :borders {:top :border-unrelated :right :border-unrelated :bottom :border-unrelated}})
+(draw-box (text "Reserved" {:font-style "italic" :font-size 24}) {:span 3})
+(draw-box (text "MNPELP" {:font-style "italic" :font-size 20}) {:span 3})
+(draw-box (text "Reserved" {:font-style "italic" :font-size 20}) {:span 3})
+(draw-box (text "MNPV" {:font-size 24}) {:span 2 :text-anchor "end" :borders {:top :border-unrelated :bottom :border-unrelated :left :border-unrelated}})
+(draw-box (text "(WARL)" {:font-weight "bold" :font-size 20}) {:span 2 :text-anchor "start" :borders {:top :border-unrelated :right :border-unrelated :bottom :border-unrelated}})
(draw-box (text "Reserved" {:font-style "italic" :font-size 24}) {:span 4})
(draw-box "NMIE" {:span 2})
(draw-box (text "Reserved" {:font-style "italic" :font-size 24}) {:span 4})
(draw-box "MXLEN-13" {:span 8 :borders {}})
(draw-box "2" {:span 4 :borders {}})
-(draw-box "3" {:span 4 :borders {}})
-(draw-box "1" {:span 6 :borders {}})
+(draw-box "1" {:span 3 :borders {}})
+(draw-box "1" {:span 3 :borders {}})
+(draw-box "1" {:span 3 :borders {}})
+(draw-box "1" {:span 4 :borders {}})
(draw-box "3" {:span 4 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "3" {:span 4 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/mseccfg.adoc b/src/images/bytefield/mseccfg.adoc
index cdf4803..1c8cc35 100644
--- a/src/images/bytefield/mseccfg.adoc
+++ b/src/images/bytefield/mseccfg.adoc
@@ -4,9 +4,11 @@
(def row-height 45)
(def row-header-fn nil)
(def boxes-per-row 32)
-(draw-column-headers {:height 20 :font-size 18 :labels (reverse ["" "0" "" "1" "" "2" "" "3" "" "7" "" "8" "" "" "9" "" "10" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "63"])})
+(draw-column-headers {:height 20 :font-size 18 :labels (reverse ["" "0" "" "1" "" "2" "" "3" "" "7" "" "8" "" "" "9" "" "10" "" "" "" "31" "32" "" "33" "34" "" "" "" "" "" "" "63"])})
-(draw-box (text "WPRI" {:font-weight "bold"}) {:span 16})
+(draw-box (text "WPRI" {:font-weight "bold"}) {:span 8})
+(draw-box "PMM" {:span 3})
+(draw-box (text "WPRI" {:font-weight "bold"}) {:span 5})
(draw-box "SSEED" {:span 3})
(draw-box "USEED" {:span 3})
(draw-box (text "WPRI" {:font-weight "bold"}) {:span 3})
@@ -14,7 +16,9 @@
(draw-box "MMWP" {:span 3})
(draw-box "MML" {:span 2})
-(draw-box "54" {:span 16 :borders {}})
+(draw-box "30" {:span 8 :borders {}})
+(draw-box "2" {:span 3 :borders {}})
+(draw-box "22" {:span 5 :borders {}})
(draw-box "1" {:span 3 :borders {}})
(draw-box "1" {:span 3 :borders {}})
(draw-box "5" {:span 3 :borders {}})
diff --git a/src/images/bytefield/priv-instr-set.edn b/src/images/bytefield/priv-instr-set.edn
index 4c79ce5..c0805c7 100644
--- a/src/images/bytefield/priv-instr-set.edn
+++ b/src/images/bytefield/priv-instr-set.edn
@@ -113,7 +113,7 @@
(draw-box (text "Hypervisor Virtual-Machine Load and Store Instructions" {:font-weight "bold" :font-size 24}) {:span 27 :borders {}})
(draw-box nil {:span 5 :borders {}})
-(draw-box "0110001" {:span 8})
+(draw-box "0110000" {:span 8})
(draw-box "00000" {:span 4})
(draw-box "rs1" {:span 4})
(draw-box "100" {:span 3})
@@ -121,7 +121,7 @@
(draw-box "1110011" {:span 4})
(draw-box "HLV.B" {:span 5 :text-anchor "start" :borders {}})
-(draw-box "0110001" {:span 8})
+(draw-box "0110000" {:span 8})
(draw-box "00001" {:span 4})
(draw-box "rs1" {:span 4})
(draw-box "100" {:span 3})
@@ -161,7 +161,7 @@
(draw-box "1110011" {:span 4})
(draw-box "HLVX.HU" {:span 5 :text-anchor "start" :borders {}})
-(draw-box "0110010" {:span 8})
+(draw-box "0110100" {:span 8})
(draw-box "00011" {:span 4})
(draw-box "rs1" {:span 4})
(draw-box "100" {:span 3})
@@ -264,4 +264,4 @@
(draw-box "00000" {:span 4})
(draw-box "1110011" {:span 4})
(draw-box " HINVAL.GVMA" {:span 5 :text-anchor "start" :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/rv32satp.edn b/src/images/bytefield/rv32satp.edn
index 446c25a..5d388a5 100644
--- a/src/images/bytefield/rv32satp.edn
+++ b/src/images/bytefield/rv32satp.edn
@@ -21,6 +21,6 @@
(draw-box (text "(WARL)" {:font-weight "bold" :font-size 24}) {:span 8 :text-anchor "start" :borders {:top :border-unrelated :bottom :border-unrelated :right :border-unrelated}})
(draw-box "1" {:span 8 :borders {}})
-(draw-box "16" {:span 8 :borders {}})
+(draw-box "9" {:span 8 :borders {}})
(draw-box "22" {:span 16 :borders {}})
---- \ No newline at end of file
diff --git a/src/images/bytefield/rvc-instr-quad1.adoc b/src/images/bytefield/rvc-instr-quad1.adoc
index ea08909..e0f6073 100644
--- a/src/images/bytefield/rvc-instr-quad1.adoc
+++ b/src/images/bytefield/rvc-instr-quad1.adoc
@@ -45,7 +45,7 @@
(draw-box "011" {:span 3})
(draw-box "imm[9]" {:span 1})
(draw-box "2" {:span 5})
-(draw-box "imm[4:0]" {:span 5})
+(draw-box "imm[4|6|8:7|5]" {:span 5})
(draw-box "01" {:span 2})
(draw-box (text "C.ADDI16SP" :math [:sub "(RES, imm=0)"]) {:span 3 :text-anchor "start" :borders {}})
@@ -112,7 +112,7 @@
(draw-box "01" {:span 2})
(draw-box "rs2′" {:span 3})
(draw-box "01" {:span 2})
-(draw-box (text "C.XOR" :math [:sub "(RV64/128)"]) {:span 3 :text-anchor "start" :borders {}})
+(draw-box "C.XOR" {:span 3 :text-anchor "start" :borders {}})
(draw-box "100" {:span 3})
(draw-box "0" {:span 1})
diff --git a/src/images/bytefield/senvcfg.edn b/src/images/bytefield/senvcfg.edn
index 077541b..a60cfa6 100644
--- a/src/images/bytefield/senvcfg.edn
+++ b/src/images/bytefield/senvcfg.edn
@@ -7,8 +7,12 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "SXLEN-1" {:span 11 :text-anchor "start" :borders {}})
-(draw-box "8" {:span 11 :text-anchor "end" :borders {}})
+(draw-box "SXLEN-1" {:span 5 :text-anchor "start" :borders {}})
+(draw-box "34" {:span 5 :text-anchor "end" :borders {}})
+(draw-box "33" {:span 2 :text-anchor "start" :borders {}})
+(draw-box "32" {:span 1 :text-anchor "end" :borders {}})
+(draw-box "31" {:span 5 :text-anchor "start" :borders {}})
+(draw-box "8" {:span 4 :text-anchor "end" :borders {}})
(draw-box "7" {:span 2 :borders {}})
(draw-box "6" {:span 2 :borders {}})
(draw-box "5" {:text-anchor "start" :borders {}})
@@ -17,18 +21,22 @@
(draw-box "1" {:text-anchor "end" :borders {}})
(draw-box "0" {:span 2 :borders {}})
-(draw-box (text "WPRI" {:font-weight "bold" :font-size 24}) {:span 22})
+(draw-box (text "WPRI" {:font-size 24 :font-weight "bold"}) {:span 10})
+(draw-box "PMM" {:span 3})
+(draw-box (text "WPRI" {:font-size 24 :font-weight "bold"}) {:span 9})
(draw-box "CBZE" {:span 2})
(draw-box "CBCFE" {:span 2})
(draw-box "CBIE" {:span 2})
(draw-box (text "WPRI" {:font-weight "bold" :font-size 24}) {:span 2})
(draw-box "FIOM" {:span 2})
-(draw-box "SXLEN-8" {:span 22 :borders {}})
+(draw-box "SXLEN-34" {:span 10 :borders {}})
+(draw-box "2" {:span 3 :borders {}})
+(draw-box "24" {:span 9 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "2" {:span 2 :borders {}})
(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/siereg-standard.edn b/src/images/bytefield/siereg-standard.edn
index a4e2cf3..4a1fba6 100644
--- a/src/images/bytefield/siereg-standard.edn
+++ b/src/images/bytefield/siereg-standard.edn
@@ -8,8 +8,11 @@
(def boxes-per-row 32)
(draw-box nil {:span 7 :borders {}})
-(draw-box "15" {:span 3 :text-anchor "start" :borders {}})
-(draw-box "10" {:span 3 :text-anchor "end" :borders {}})
+(draw-box "15" {:text-anchor "start" :borders {}})
+(draw-box "14" {:text-anchor "end" :borders {}})
+(draw-box "13" {:span 2 :borders {}})
+(draw-box "12" {:text-anchor "start" :borders {}})
+(draw-box "10" {:text-anchor "end" :borders {}})
(draw-box "9" {:span 2 :borders {}})
(draw-box "8" {:text-anchor "start" :borders {}})
(draw-box "6" {:text-anchor "end" :borders {}})
@@ -21,7 +24,9 @@
(draw-box nil {:span 8 :borders {}})
(draw-box nil {:span 7 :borders {}})
-(draw-box "0" {:span 6})
+(draw-box "0" {:span 2})
+(draw-box (text "LCOFIE" {:font-size 20}) {:span 2})
+(draw-box "0" {:span 2})
(draw-box "SEIE" {:span 2})
(draw-box "0" {:span 2})
(draw-box "STIE" {:span 2})
@@ -31,7 +36,9 @@
(draw-box nil {:span 8 :borders {}})
(draw-box nil {:span 7 :borders {}})
-(draw-box "6" {:span 6 :borders {}})
+(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
+(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
@@ -39,4 +46,4 @@
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:borders {}})
(draw-box nil {:span 8 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/sipreg-standard.edn b/src/images/bytefield/sipreg-standard.edn
index 34bbfb4..440fd8f 100644
--- a/src/images/bytefield/sipreg-standard.edn
+++ b/src/images/bytefield/sipreg-standard.edn
@@ -8,8 +8,11 @@
(def boxes-per-row 32)
(draw-box nil {:span 7 :borders {}})
-(draw-box "15" {:span 3 :text-anchor "start" :borders {}})
-(draw-box "10" {:span 3 :text-anchor "end" :borders {}})
+(draw-box "15" {:text-anchor "start" :borders {}})
+(draw-box "14" {:text-anchor "end" :borders {}})
+(draw-box "13" {:span 2 :borders {}})
+(draw-box "12" {:text-anchor "start" :borders {}})
+(draw-box "10" {:text-anchor "end" :borders {}})
(draw-box "9" {:span 2 :borders {}})
(draw-box "8" {:text-anchor "start" :borders {}})
(draw-box "6" {:text-anchor "end" :borders {}})
@@ -21,7 +24,9 @@
(draw-box nil {:span 8 :borders {}})
(draw-box nil {:span 7 :borders {}})
-(draw-box "0" {:span 6})
+(draw-box "0" {:span 2})
+(draw-box (text "LCOFIP" {:font-size 20}) {:span 2})
+(draw-box "0" {:span 2})
(draw-box "SEIP" {:span 2})
(draw-box "0" {:span 2})
(draw-box "STIP" {:span 2})
@@ -31,7 +36,9 @@
(draw-box nil {:span 8 :borders {}})
(draw-box nil {:span 7 :borders {}})
-(draw-box "6" {:span 6 :borders {}})
+(draw-box "2" {:span 2 :borders {}})
+(draw-box "1" {:span 2 :borders {}})
+(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
(draw-box "3" {:span 2 :borders {}})
(draw-box "1" {:span 2 :borders {}})
@@ -39,4 +46,4 @@
(draw-box "1" {:span 2 :borders {}})
(draw-box "1" {:borders {}})
(draw-box nil {:span 8 :borders {}})
----- \ No newline at end of file
+----
diff --git a/src/images/bytefield/vsepcreg.edn b/src/images/bytefield/vsepcreg.edn
index 16a2e6b..fb6c757 100644
--- a/src/images/bytefield/vsepcreg.edn
+++ b/src/images/bytefield/vsepcreg.edn
@@ -7,7 +7,7 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "VSXLEN" {:span 16 :text-anchor "start" :borders {}})
+(draw-box "VSXLEN-1" {:span 16 :text-anchor "start" :borders {}})
(draw-box "0" {:span 16 :text-anchor "end" :borders {}})
(draw-box "vsepc" {:span 32})
(draw-box "VSXLEN" {:span 32 :borders {}})
diff --git a/src/images/bytefield/vsscratchreg.edn b/src/images/bytefield/vsscratchreg.edn
index c3759c5..8eb14d4 100644
--- a/src/images/bytefield/vsscratchreg.edn
+++ b/src/images/bytefield/vsscratchreg.edn
@@ -7,7 +7,7 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "VSXLEN" {:span 16 :text-anchor "start" :borders {}})
+(draw-box "VSXLEN-1" {:span 16 :text-anchor "start" :borders {}})
(draw-box "0" {:span 16 :text-anchor "end" :borders {}})
(draw-box "vsscratch" {:span 32})
(draw-box "VSXLEN" {:span 32 :borders {}})
diff --git a/src/images/bytefield/vsstatusreg.edn b/src/images/bytefield/vsstatusreg.edn
index 87f4725..95780a6 100644
--- a/src/images/bytefield/vsstatusreg.edn
+++ b/src/images/bytefield/vsstatusreg.edn
@@ -7,8 +7,8 @@
(def right-margin 30)
(def boxes-per-row 32)
-(draw-box "VSXLEN-1" {:span 3 :borders {}})
-(draw-box "VSXLEN-2" {:span 5 :text-anchor "start" :borders {}})
+(draw-box "63" {:span 3 :borders {}})
+(draw-box "62" {:span 5 :text-anchor "start" :borders {}})
(draw-box "34" {:span 5 :text-anchor "end" :borders {}})
(draw-box "33" {:span 2 :text-anchor "start" :borders {}})
(draw-box "32" {:span 2 :text-anchor "end" :borders {}})
@@ -30,7 +30,7 @@
(draw-box nil {:span 2 :borders {}})
(draw-box "1" {:span 3 :borders {}})
-(draw-box "VSXLEN-35" {:span 10 :borders {}})
+(draw-box "29" {:span 10 :borders {}})
(draw-box "2" {:span 4 :borders {}})
(draw-box "12" {:span 6 :borders {}})
(draw-box "1" {:span 2 :borders {}})
diff --git a/src/images/riscv-horizontal-color.svg b/src/images/riscv-horizontal-color.svg
new file mode 100644
index 0000000..be6e6b9
--- /dev/null
+++ b/src/images/riscv-horizontal-color.svg
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 26.4.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+ viewBox="0 0 1000 175.4" style="enable-background:new 0 0 1000 175.4;" xml:space="preserve">
+<style type="text/css">
+ .st0{fill:#E6AC2C;}
+ .st1{fill:#2C356D;}
+</style>
+<g>
+ <g>
+ <path class="st0" d="M91.6,58.2c0,22.9-13.9,43.7-40.9,48.6l38.1,45.1l3.4-4.8L161,49.9V9H49.3C77.7,11.7,91.6,35.3,91.6,58.2z"/>
+ <path class="st1" d="M13.2,85.9h26.4C59,85.9,68.7,72,68.7,58.2c0-13.9-9.7-27.1-29.1-27.1H3.5v135.3h67.3L13.2,97.1V85.9z
+ M111,159.5l50.6-70.8v77.7H106L111,159.5z"/>
+ <rect x="392.7" y="35.3" class="st1" width="22.9" height="107.5"/>
+ <path class="st1" d="M552,119.3l-119.1-0.1v23.6h120c9,0,16.7-3.5,22.9-9.7s9.7-13.9,9.7-22.9s-3.5-16.6-9.7-22.9
+ c-6.2-6.2-13.9-9.7-22.9-9.7L465.6,77c-5.1,0-9.3-4.2-9.4-9.3l0,0l0,0c0-5.2,4.2-9.3,9.4-9.3l119.9-0.1v-23h-120
+ c-9,0-16.7,3.5-22.9,9.7c-6.2,6.2-9.7,13.9-9.7,22.9s3.5,16.6,9.7,22.9c6.2,6.2,13.9,9,22.9,9h87.2c5.2,0,9.4,4.2,9.3,9.4l0,0l0,0
+ C562.1,114.8,557.6,119.3,552,119.3z"/>
+ <path class="st1" d="M650,35.3h99.2v22.9H650c-8.3,0-15.2,2.7-21.5,9c-6.2,6.2-9,13.2-9,21.5s2.8,15.2,9,21.5
+ c6.3,6.2,13.2,9,21.5,9h99.2v23.6H650c-14.6,0-27.1-5.6-37.5-16s-15.2-22.9-15.2-37.5s4.8-27.1,15.2-37.5
+ C623,40.9,635.4,35.3,650,35.3z"/>
+ <path class="st1" d="M342,78.5l-95.7-0.2V58.9l96.7-0.2c5.2,0,9.5,4.2,9.5,9.4l0,0l0,0C352.4,73.9,347.8,78.5,342,78.5z
+ M376,142.8l-30.5-42.3c8.3-0.7,15.2-3.5,20.8-9.7c6.2-6.3,9.7-13.9,9.7-22.9s-3.5-16.7-9.7-22.9s-13.9-9.7-22.9-9.7h-120v107.5
+ h22.9v-42.3h70.8l30.5,42.3H376z"/>
+ <polyline class="st0" points="863.7,142.8 800.8,35.3 827.7,35.3 876.8,120.6 926,35.3 952.5,35.3 890,142.8 "/>
+ <rect x="763.1" y="79" class="st0" width="45.1" height="20.8"/>
+ </g>
+ <g>
+ <path class="st1" d="M996.5,52.5c0,9.4-7.3,16.7-16.9,16.7c-9.5,0-17-7.3-17-16.7c0-9.2,7.5-16.5,17-16.5
+ C989.1,36,996.5,43.3,996.5,52.5z M966.8,52.5c0,7.3,5.4,13.2,12.9,13.2c7.2,0,12.6-5.8,12.6-13.1s-5.3-13.3-12.7-13.3
+ S966.8,45.2,966.8,52.5z M977,61.1h-3.8V44.6c1.5-0.3,3.6-0.5,6.3-0.5c3.1,0,4.5,0.5,5.7,1.2c0.9,0.7,1.6,2,1.6,3.6
+ c0,1.8-1.4,3.2-3.4,3.8V53c1.6,0.6,2.5,1.8,3,4c0.5,2.5,0.8,3.5,1.2,4.1h-4.1c-0.5-0.6-0.8-2.1-1.3-4c-0.3-1.8-1.3-2.6-3.4-2.6
+ H977V61.1L977,61.1z M977,51.8h1.8c2.1,0,3.8-0.7,3.8-2.4c0-1.5-1.1-2.5-3.5-2.5c-1,0-1.7,0.1-2.1,0.2V51.8z"/>
+ </g>
+</g>
+</svg>
diff --git a/src/images/smepmp-visual-representation.png b/src/images/smepmp-visual-representation.png
new file mode 100644
index 0000000..9502271
--- /dev/null
+++ b/src/images/smepmp-visual-representation.png
Binary files differ
diff --git a/src/images/wavedrom/c-ci.adoc b/src/images/wavedrom/c-ci.adoc
index 3651f61..7dae51e 100644
--- a/src/images/wavedrom/c-ci.adoc
+++ b/src/images/wavedrom/c-ci.adoc
@@ -5,7 +5,7 @@
{reg: [
{bits: 2, name: 'op', type: 3, attr: ['2', 'C2']},
{bits: 5, name: 'shamt[4:0]', type: 1, attr: ['5', 'shamt[4:0]']},
- {bits: 5, name: 'rd/rs1', type: 5, attr: ['5', 'dest≠0']},
+ {bits: 5, name: 'rd/rs1', type: 5, attr: ['5', 'dest != 0']},
{bits: 1, name: 'shamt[5]', type: 5, attr: ['1', 'shamt[5]']},
{bits: 3, name: 'funct3', type: 5, attr: ['3', 'C.SLLI']},
]}
diff --git a/src/images/wavedrom/c-ciw.adoc b/src/images/wavedrom/c-ciw.adoc
index 3e62efe..111b272 100644
--- a/src/images/wavedrom/c-ciw.adoc
+++ b/src/images/wavedrom/c-ciw.adoc
@@ -5,7 +5,7 @@
{reg: [
{bits: 2, name: 'op', type: 3, attr: ['2','C0'],},
{bits: 3, name: 'rd′', type: 5, attr: ['3','dest'],},
- {bits: 8, name: 'imm', type: 5, attr: ['8','uimm[5:4|9:6|2|3]']},
+ {bits: 8, name: 'imm', type: 5, attr: ['8','nzuimm[5:4|9:6|2|3]']},
{bits: 3, name: 'funct3', type: 5, attr: ['3','C.ADDI4SPN']},
], config: {bits: 16}}
....
diff --git a/src/images/wavedrom/c-cs-format-ls.adoc b/src/images/wavedrom/c-cs-format-ls.adoc
index dd9acef..59c5a36 100644
--- a/src/images/wavedrom/c-cs-format-ls.adoc
+++ b/src/images/wavedrom/c-cs-format-ls.adoc
@@ -8,7 +8,7 @@
{bits: 3, name: 'rs2`', type: 3, attr: ['3', 'src','src','src','src','src']},
{bits: 2, name: 'imm', type: 2, attr: ['2', 'offset[2|6]','offset[7:6]','offset[7:6]','offset[2|6]','offset[7:6]']},
{bits: 3, name: 'rs1`', type: 3, attr: ['3', 'base','base','base','base','base']},
- {bits: 3, name: 'imm', types:3, attr: ['3', 'offset[5:3]','offset[5:3]','offset[5|4|8]','offset[5:3]','offset[5:3]']},
+ {bits: 3, name: 'imm', type: 3, attr: ['3', 'offset[5:3]','offset[5:3]','offset[5|4|8]','offset[5:3]','offset[5:3]']},
{bits: 3, name: 'funct3', type: 8, attr: ['3', 'C.SW','C.SD','C.SQ','C.FSW','C.FSD']},
], config: {bits: 16}}
....
diff --git a/src/images/wavedrom/c-int-reg-immed.adoc b/src/images/wavedrom/c-int-reg-immed.adoc
index a804555..45168d7 100644
--- a/src/images/wavedrom/c-int-reg-immed.adoc
+++ b/src/images/wavedrom/c-int-reg-immed.adoc
@@ -4,9 +4,9 @@
....
{reg: [
{bits: 2, name: 'op', type: 3, attr: ['2','C1', 'C1', 'C1']},
- {bits: 5, name: 'imm[4:0]', type: 1, attr: ['5','imm[4:0]', 'imm[4:0], imm[4|6|8:7|5]']},
- {bits: 5, name: 'rd/rs1', type: 5, attr: ['5','dest≠0', 'dest≠0', '2']},
- {bits: 1, name: 'imm[5]', type: 5, attr: ['1','imm[5]', 'imm[5]', 'imm[9]']},
+ {bits: 5, name: 'imm[4:]', type: 1, attr: ['5','nzimm[4:0]', 'imm[4:0]', 'nzimm[4|6|8:7|5]']},
+ {bits: 5, name: 'rd/rs1', type: 5, attr: ['5','dest != 0', 'dest != 0', '2']},
+ {bits: 1, name: 'imm[5]', type: 5, attr: ['1','nzimm[5]', 'imm[5]', 'nzimm[9]']},
{bits: 3, name: 'funct3', type: 5, attr: ['3','C.ADDI', 'C.ADDIW', 'C.ADDI16SP']},
], config: {bits: 16}}
....
diff --git a/src/images/wavedrom/c-integer-const-gen.adoc b/src/images/wavedrom/c-integer-const-gen.adoc
index 0eaf2d7..732961b 100644
--- a/src/images/wavedrom/c-integer-const-gen.adoc
+++ b/src/images/wavedrom/c-integer-const-gen.adoc
@@ -5,8 +5,8 @@
{reg: [
{bits: 2, name: 'op', type: 3, attr: ['2','C1', 'C1']},
{bits: 5, name: 'imm[4:0]', type: 1, attr: ['5','imm[4:0]','imm[16:12]']},
- {bits: 5, name: 'rd', type: 5, attr: ['5','dest≠0', 'dest≠{0, 2}']},
- {bits: 1, name: 'imm[5]', type: 5, attr: ['1','imm[5]', 'imm[17]'],},
+ {bits: 5, name: 'rd', type: 5, attr: ['5','dest != 0', 'dest != {0, 2}']},
+ {bits: 1, name: 'imm[5]', type: 5, attr: ['1','imm[5]', 'nzimm[17]'],},
{bits: 3, name: 'funct3', type: 5, attr: ['3','C.LI', 'C.LUI'],},
], config: {bits: 16}}
....
diff --git a/src/images/wavedrom/c-mop.adoc b/src/images/wavedrom/c-mop.adoc
new file mode 100644
index 0000000..0aee8e4
--- /dev/null
+++ b/src/images/wavedrom/c-mop.adoc
@@ -0,0 +1,12 @@
+[wavedrom, ,svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, type: 8 },
+ { bits: 5, name: 0x0 },
+ { bits: 1, name: 0x1, type: 4 },
+ { bits: 3, name: 'n[3:1]', type: 4 },
+ { bits: 1, name: 0x0, type: 4 },
+ { bits: 1, name: 0x0 },
+ { bits: 3, name: 0x3 },
+]}
+....
diff --git a/src/images/wavedrom/c-sp-load-store-css.adoc b/src/images/wavedrom/c-sp-load-store-css.adoc
index 8bbe0d9..2cafcd8 100644
--- a/src/images/wavedrom/c-sp-load-store-css.adoc
+++ b/src/images/wavedrom/c-sp-load-store-css.adoc
@@ -3,10 +3,10 @@
[wavedrom, ,svg]
....
{reg: [
- {bits: 1, name: 'op', type: 8, attr: ['2','C2','C2','C2','C2','C2']},
+ {bits: 2, name: 'op', type: 8, attr: ['2','C2','C2','C2','C2','C2']},
{bits: 5, name: 'rs2', type: 4, attr: ['5','src', 'src', 'src', 'src', 'src']},
{bits: 6, name: 'imm', type: 3, attr: ['6','offset[5:2|7:6]', 'offset[5:3|8:6]', 'offset[5:4|9:6]', 'offset[5:2|7:6]','offset[5:3|8:6]']},
- {bits: 4, name: 'funct3', type: 8, attr: ['3','C.SWSP', 'C.SDSP', 'C.SQSP', 'C.FSWSP', 'C.FSDSP']},
+ {bits: 3, name: 'funct3', type: 8, attr: ['3','C.SWSP', 'C.SDSP', 'C.SQSP', 'C.FSWSP', 'C.FSDSP']},
], config: {bits: 16}}
....
diff --git a/src/images/wavedrom/c-srli-srai.adoc b/src/images/wavedrom/c-srli-srai.adoc
index 7f50c37..557bb39 100644
--- a/src/images/wavedrom/c-srli-srai.adoc
+++ b/src/images/wavedrom/c-srli-srai.adoc
@@ -10,6 +10,4 @@
{bits: 1, name: 'shamt[5]', type: 5, attr: ['1','shamt[5]', 'shamt[5]'],},
{bits: 3, name: 'funct3', type: 5, attr: ['3','C.SRLI', 'C.SRAI'],},
]}
-....
-
-
+.... \ No newline at end of file
diff --git a/src/images/wavedrom/csr-instr.adoc b/src/images/wavedrom/csr-instr.adoc
index e4a54a5..93022be 100644
--- a/src/images/wavedrom/csr-instr.adoc
+++ b/src/images/wavedrom/csr-instr.adoc
@@ -5,7 +5,7 @@
....
{reg: [
{bits: 7, name: 'opcode', attr: ['7', 'SYSTEM', 'SYSTEM', 'SYSTEM', 'SYSTEM', 'SYSTEM', 'SYSTEM'], type: 8},
- {bits: 5, name: 'rd', attr: ['3', 'dest', 'dest', 'dest', 'dest', 'dest', 'dest'], type: 2},
+ {bits: 5, name: 'rd', attr: ['5', 'dest', 'dest', 'dest', 'dest', 'dest', 'dest'], type: 2},
{bits: 3, name: 'funct3', attr: ['3', 'CSRRW', 'CSRRS', 'CSRRC', 'CSRRWI', 'CSRRSI', 'CSRRCI'], type: 8},
{bits: 5, name: 'rs1', attr: ['5', 'source', 'source', 'source', 'uimm[4:0]', 'uimm[4:0]', 'uimm[4:0]'], type: 4},
{bits: 12, name: 'csr', attr: ['12', 'source/dest', 'source/dest', 'source/dest', 'source/dest', 'source/dest', 'source/dest'], type: 4},
diff --git a/src/images/wavedrom/ct-conditional.adoc b/src/images/wavedrom/ct-conditional.adoc
index 84ef7c5..b886d7c 100644
--- a/src/images/wavedrom/ct-conditional.adoc
+++ b/src/images/wavedrom/ct-conditional.adoc
@@ -4,11 +4,10 @@
....
{reg: [
{bits: 7, name: 'opcode', attr: ['7', 'BRANCH', 'BRANCH', 'BRANCH'], type: 8},
- {bits: 1, name: '[11]', attr: '1', type: 3},
- {bits: 4, name: 'imm[4:1]', attr: ['4', 'offset[11|4:1]', 'offset[11|4:1]', 'offset[11|4:1]'], type: 3},
+ {bits: 5, name: 'imm[4:1|11]', attr: ['5', 'offset[4:1|11]', 'offset[4:1|11]', 'offset[4:1|11]'], type: 3},
{bits: 3, name: 'funct3', attr: ['3', 'BEQ/BNE', 'BLT[U]', 'BGE[U]'], type: 8},
{bits: 5, name: 'rs1', attr: ['5', 'src1', 'src1', 'src1'], type: 4},
{bits: 5, name: 'rs2', attr: ['5', 'src2','src2', 'src2'], type: 4},
- {bits: 7, name: 'imm[12|10:5]', attr: ['6', 'offset[12|10:5]', 'offset[12|10:5]', 'offset[12|10:5]'], type: 3},
+ {bits: 7, name: 'imm[12|10:5]', attr: ['7', 'offset[12|10:5]', 'offset[12|10:5]', 'offset[12|10:5]'], type: 3},
], config:{fontsize: 10}}
....
diff --git a/src/images/wavedrom/ct-unconditional-2.adoc b/src/images/wavedrom/ct-unconditional-2.adoc
index ef33a9e..4dda824 100644
--- a/src/images/wavedrom/ct-unconditional-2.adoc
+++ b/src/images/wavedrom/ct-unconditional-2.adoc
@@ -4,7 +4,7 @@
....
{reg: [
{bits: 7, name: 'opcode', attr: ['7', 'JALR'], type: 8},
- {bits: 5, name: 'rd', attr: ['6', 'dest'], type: 2},
+ {bits: 5, name: 'rd', attr: ['5', 'dest'], type: 2},
{bits: 3, name: 'funct3', attr: ['3', '0'], type: 8},
{bits: 5, name: 'rs1', attr: ['5', 'base'], type: 4},
{bits: 12, name: 'imm[11:0]', attr: ['12', 'offset[11:0]'], type: 3},
diff --git a/src/images/wavedrom/division-op.adoc b/src/images/wavedrom/division-op.adoc
index 600337d..fabdac1 100644
--- a/src/images/wavedrom/division-op.adoc
+++ b/src/images/wavedrom/division-op.adoc
@@ -3,7 +3,7 @@
[wavedrom, ,svg]
....
{reg: [
- {bits: 7, name: 'opcode', attr: ['7', 'OP', 'OP'], type: 8},
+ {bits: 7, name: 'opcode', attr: ['7', 'OP', 'OP-32'], type: 8},
{bits: 5, name: 'rd', attr: ['5', 'dest', 'dest'], type: 2},
{bits: 3, name: 'funct3', attr: ['3','DIV[U]/REM[U]', 'DIV[U]W/REM[U]W'], type: 8},
{bits: 5, name: 'rs1', attr: ['5', 'dividend', 'dividend'], type: 4},
diff --git a/src/images/wavedrom/load-reserve-st-conditional.adoc b/src/images/wavedrom/load-reserve-st-conditional.adoc
index 67ce56a..355342c 100644
--- a/src/images/wavedrom/load-reserve-st-conditional.adoc
+++ b/src/images/wavedrom/load-reserve-st-conditional.adoc
@@ -10,7 +10,7 @@
{bits: 3, name: 'funct3', attr: ['3', 'width', 'width'], type: 8},
{bits: 5, name: 'rs1', attr: ['5', 'addr', 'addr'], type: 4},
{bits: 5, name: 'rs2', attr: ['5', '0', 'src'], type: 4},
- {bits: 1, name: 'r1', attr: ['1', 'ring', 'ring'], type: 8},
+ {bits: 1, name: 'rl', attr: ['1', 'ring', 'ring'], type: 8},
{bits: 1, name: 'aq', attr: ['1', 'orde', 'orde'], type: 8},
{bits: 5, name: 'funct5', attr: ['5', 'LR.W/D', 'SC.W/D'], type: 8},
]}
diff --git a/src/images/wavedrom/mop-r.adoc b/src/images/wavedrom/mop-r.adoc
new file mode 100644
index 0000000..713b37c
--- /dev/null
+++ b/src/images/wavedrom/mop-r.adoc
@@ -0,0 +1,15 @@
+[wavedrom, ,svg]
+....
+{reg:[
+ { bits: 7, name: 0x73, attr: ['SYSTEM'], type: 8 },
+ { bits: 5, name: 'rd', type: 2 },
+ { bits: 3, name: 0x4 },
+ { bits: 5, name: 'rs1', type: 4 },
+ { bits: 2, name: 'n[1:0]' },
+ { bits: 4, name: 0x7 },
+ { bits: 2, name: 'n[3:2]' },
+ { bits: 2, name: 0x0 },
+ { bits: 1, name: 'n[4]' },
+ { bits: 1, name: 0x1 },
+], config: {fontsize: 11}}
+....
diff --git a/src/images/wavedrom/mop-rr.adoc b/src/images/wavedrom/mop-rr.adoc
new file mode 100644
index 0000000..b70f854
--- /dev/null
+++ b/src/images/wavedrom/mop-rr.adoc
@@ -0,0 +1,15 @@
+[wavedrom, ,svg]
+....
+{reg:[
+ { bits: 7, name: 0x73, attr: ['SYSTEM'], type: 8 },
+ { bits: 5, name: 'rd', type: 2 },
+ { bits: 3, name: 0x4 },
+ { bits: 5, name: 'rs1', type: 4 },
+ { bits: 5, name: 'rs2', type: 4 },
+ { bits: 1, name: 0x1 },
+ { bits: 2, name: 'n[1:0]' },
+ { bits: 2, name: 0x0 },
+ { bits: 1, name: 'n[2]' },
+ { bits: 1, name: 0x1 },
+], config: {fontsize: 11}}
+....
diff --git a/src/images/wavedrom/reg-based-ldnstr.adoc b/src/images/wavedrom/reg-based-ldnstr.adoc
index 57c04c4..24d430b 100644
--- a/src/images/wavedrom/reg-based-ldnstr.adoc
+++ b/src/images/wavedrom/reg-based-ldnstr.adoc
@@ -6,7 +6,7 @@
{reg: [
{bits: 2, name: 'op', attr: ['2', 'C0', 'C0', 'C0', 'C0', 'C0'], type: 8},
{bits: 3, name: 'rd`', attr: ['3', 'dest', 'dest','dest','dest','dest'], type: 3},
- {bits: 2, name: 'imm', attr:['2', 'offest[2|6]', 'offest[7:6]', 'offest[7:6]', 'offest[2|6]', 'offest[7:6]'], type: 2},
+ {bits: 2, name: 'imm', attr:['2', 'offset[2|6]', 'offset[7:6]', 'offset[7:6]', 'offset[2|6]', 'offset[7:6]'], type: 2},
{bits: 3, name: 'rs1`', attr: ['3', 'base', 'base', 'base', 'base', 'base'], type: 2},
{bits: 3, name: 'imm', attr: ['3', 'offset[5:3]', 'offset[5:3]', 'offset[5|4|8]', 'offset[5:3]', 'offset[5:3]'], type: 3},
{bits: 3, name: 'funct3', attr: ['3', 'C.LW', 'C.LD', 'C.LQ', 'C.FLW', 'C.FLD'], type: 8},
diff --git a/src/images/wavedrom/sp-load-store-2.adoc b/src/images/wavedrom/sp-load-store-2.adoc
new file mode 100644
index 0000000..f1025e9
--- /dev/null
+++ b/src/images/wavedrom/sp-load-store-2.adoc
@@ -0,0 +1,24 @@
+//## 12.5 Single-Precision Load and Store Instructions
+
+[wavedrom, ,svg]
+....
+{reg: [
+ {bits: 7, name: 'opcode', attr: ['7', 'LOAD-FP'], type: 8},
+ {bits: 5, name: 'rd', attr: ['5', 'dest'], type: 2},
+ {bits: 3, name: 'width', attr: ['3', 'W'], type: 8},
+ {bits: 5, name: 'rs1', attr: ['5', 'base'], type: 4},
+ {bits: 12, name: 'imm[11:0]', attr: ['12', 'offset[11:0]'], type: 3},
+]}
+....
+
+[wavedrom, ,svg]
+....
+{reg: [
+ {bits: 7, name: 'opcode', attr: ['7', 'STORE-FP'], type: 8},
+ {bits: 5, name: 'imm[4:0]', attr: ['5', 'offset[4:0]'], type: 3},
+ {bits: 3, name: 'width', attr: ['3', 'W'], type: 8},
+ {bits: 5, name: 'rs1', attr: ['5', 'base'], type: 4},
+ {bits: 5, name: 'rs2', attr: ['5', 'src'], type: 4},
+ {bits: 7, name: 'imm[11:5]', attr: ['7', 'offset[11:5]'], type: 3},
+]}
+.... \ No newline at end of file
diff --git a/src/images/wavedrom/v-inst-table.adoc b/src/images/wavedrom/v-inst-table.adoc
new file mode 100644
index 0000000..0c02220
--- /dev/null
+++ b/src/images/wavedrom/v-inst-table.adoc
@@ -0,0 +1,210 @@
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+[cols="<,<,<,<,<,<,<,<,<,<,<,<,<",options="headers"]
+|===
+5+| Integer 4+| Integer 4+| FP
+
+| funct3 | | | | | funct3 | | | | funct3 | | |
+| OPIVV |V| | | | OPMVV{nbsp} |V| | | OPFVV |V| |
+| OPIVX | |X| | | OPMVX{nbsp} | |X| | OPFVF | |F|
+| OPIVI | | |I| | | | | | | | |
+|===
+
+[cols="<,<,<,<,<,<,<,<,<,<,<,<,<",options="headers"]
+|===
+5+| funct6 4+| funct6 4+| funct6
+
+| 000000 |V|X|I| vadd | 000000 |V| | vredsum | 000000 |V|F| vfadd
+| 000001 | | | | | 000001 |V| | vredand | 000001 |V| | vfredusum
+| 000010 |V|X| | vsub | 000010 |V| | vredor | 000010 |V|F| vfsub
+| 000011 | |X|I| vrsub | 000011 |V| | vredxor | 000011 |V| | vfredosum
+| 000100 |V|X| | vminu | 000100 |V| | vredminu | 000100 |V|F| vfmin
+| 000101 |V|X| | vmin | 000101 |V| | vredmin | 000101 |V| | vfredmin
+| 000110 |V|X| | vmaxu | 000110 |V| | vredmaxu | 000110 |V|F| vfmax
+| 000111 |V|X| | vmax | 000111 |V| | vredmax | 000111 |V| | vfredmax
+| 001000 | | | | | 001000 |V|X| vaaddu | 001000 |V|F| vfsgnj
+| 001001 |V|X|I| vand | 001001 |V|X| vaadd | 001001 |V|F| vfsgnjn
+| 001010 |V|X|I| vor | 001010 |V|X| vasubu | 001010 |V|F| vfsgnjx
+| 001011 |V|X|I| vxor | 001011 |V|X| vasub | 001011 | | |
+| 001100 |V|X|I| vrgather | 001100 | | | | 001100 | | |
+| 001101 | | | | | 001101 | | | | 001101 | | |
+| 001110 | |X|I| vslideup | 001110 | |X| vslide1up | 001110 | |F| vfslide1up
+| 001110 |V| | |vrgatherei16| | | | | | | |
+| 001111 | |X|I| vslidedown | 001111 | |X| vslide1down | 001111 | |F| vfslide1down
+|===
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+|===
+5+| funct6 4+| funct6 4+| funct6
+
+| 010000 |V|X|I| vadc | 010000 |V| | VWXUNARY0 | 010000 |V| | VWFUNARY0
+| | | | | | 010000 | |X| VRXUNARY0 | 010000 | |F| VRFUNARY0
+| 010001 |V|X|I| vmadc | 010001 | | | | 010001 | | |
+| 010010 |V|X| | vsbc | 010010 |V| | VXUNARY0 | 010010 |V| | VFUNARY0
+| 010011 |V|X| | vmsbc | 010011 | | | | 010011 |V| | VFUNARY1
+| 010100 | | | | | 010100 |V| | VMUNARY0 | 010100 | | |
+| 010101 | | | | | 010101 | | | | 010101 | | |
+| 010110 | | | | | 010110 | | | | 010110 | | |
+| 010111 |V|X|I| vmerge/vmv | 010111 |V| | vcompress | 010111 | |F| vfmerge/vfmv
+| 011000 |V|X|I| vmseq | 011000 |V| | vmandn | 011000 |V|F| vmfeq
+| 011001 |V|X|I| vmsne | 011001 |V| | vmand | 011001 |V|F| vmfle
+| 011010 |V|X| | vmsltu | 011010 |V| | vmor | 011010 | | |
+| 011011 |V|X| | vmslt | 011011 |V| | vmxor | 011011 |V|F| vmflt
+| 011100 |V|X|I| vmsleu | 011100 |V| | vmorn | 011100 |V|F| vmfne
+| 011101 |V|X|I| vmsle | 011101 |V| | vmnand | 011101 | |F| vmfgt
+| 011110 | |X|I| vmsgtu | 011110 |V| | vmnor | 011110 | | |
+| 011111 | |X|I| vmsgt | 011111 |V| | vmxnor | 011111 | |F| vmfge
+|===
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+|===
+5+| funct6 4+| funct6 4+| funct6
+
+| 100000 |V|X|I| vsaddu | 100000 |V|X| vdivu | 100000 |V|F| vfdiv
+| 100001 |V|X|I| vsadd | 100001 |V|X| vdiv | 100001 | |F| vfrdiv
+| 100010 |V|X| | vssubu | 100010 |V|X| vremu | 100010 | | |
+| 100011 |V|X| | vssub | 100011 |V|X| vrem | 100011 | | |
+| 100100 | | | | | 100100 |V|X| vmulhu | 100100 |V|F| vfmul
+| 100101 |V|X|I| vsll | 100101 |V|X| vmul | 100101 | | |
+| 100110 | | | | | 100110 |V|X| vmulhsu | 100110 | | |
+| 100111 |V|X| | vsmul | 100111 |V|X| vmulh | 100111 | |F| vfrsub
+| 100111 | | |I| vmv<nr>r | | | | | | | |
+| 101000 |V|X|I| vsrl | 101000 | | | | 101000 |V|F| vfmadd
+| 101001 |V|X|I| vsra | 101001 |V|X| vmadd | 101001 |V|F| vfnmadd
+| 101010 |V|X|I| vssrl | 101010 | | | | 101010 |V|F| vfmsub
+| 101011 |V|X|I| vssra | 101011 |V|X| vnmsub | 101011 |V|F| vfnmsub
+| 101100 |V|X|I| vnsrl | 101100 | | | | 101100 |V|F| vfmacc
+| 101101 |V|X|I| vnsra | 101101 |V|X| vmacc | 101101 |V|F| vfnmacc
+| 101110 |V|X|I| vnclipu | 101110 | | | | 101110 |V|F| vfmsac
+| 101111 |V|X|I| vnclip | 101111 |V|X| vnmsac | 101111 |V|F| vfnmsac
+|===
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+|===
+5+| funct6 4+| funct6 4+| funct6
+
+| 110000 |V| | | vwredsumu | 110000 |V|X| vwaddu | 110000 |V|F| vfwadd
+| 110001 |V| | | vwredsum | 110001 |V|X| vwadd | 110001 |V| | vfwredusum
+| 110010 | | | | | 110010 |V|X| vwsubu | 110010 |V|F| vfwsub
+| 110011 | | | | | 110011 |V|X| vwsub | 110011 |V| | vfwredosum
+| 110100 | | | | | 110100 |V|X| vwaddu.w | 110100 |V|F| vfwadd.w
+| 110101 | | | | | 110101 |V|X| vwadd.w | 110101 | | |
+| 110110 | | | | | 110110 |V|X| vwsubu.w | 110110 |V|F| vfwsub.w
+| 110111 | | | | | 110111 |V|X| vwsub.w | 110111 | | |
+| 111000 | | | | | 111000 |V|X| vwmulu | 111000 |V|F| vfwmul
+| 111001 | | | | | 111001 | | | | 111001 | | |
+| 111010 | | | | | 111010 |V|X| vwmulsu | 111010 | | |
+| 111011 | | | | | 111011 |V|X| vwmul | 111011 | | |
+| 111100 | | | | | 111100 |V|X| vwmaccu | 111100 |V|F| vfwmacc
+| 111101 | | | | | 111101 |V|X| vwmacc | 111101 |V|F| vfwnmacc
+| 111110 | | | | | 111110 | |X| vwmaccus | 111110 |V|F| vfwmsac
+| 111111 | | | | | 111111 |V|X| vwmaccsu | 111111 |V|F| vfwnmsac
+|===
+
+<<<
+
+.VRXUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs2 |
+
+| 00000 | vmv.s.x
+|===
+
+.VWXUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs1 |
+
+| 00000 | vmv.x.s
+| 10000 | vcpop
+| 10001 | vfirst
+|===
+
+.VXUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs1 |
+
+| 00010 | vzext.vf8
+| 00011 | vsext.vf8
+| 00100 | vzext.vf4
+| 00101 | vsext.vf4
+| 00110 | vzext.vf2
+| 00111 | vsext.vf2
+|===
+
+.VRFUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs2 |
+
+| 00000 | vfmv.s.f
+|===
+
+.VWFUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs1 |
+
+| 00000 | vfmv.f.s
+|===
+
+.VFUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs1 | name
+
+2+| single-width converts
+| 00000 | vfcvt.xu.f.v
+| 00001 | vfcvt.x.f.v
+| 00010 | vfcvt.f.xu.v
+| 00011 | vfcvt.f.x.v
+| 00110 | vfcvt.rtz.xu.f.v
+| 00111 | vfcvt.rtz.x.f.v
+| |
+2+| widening converts
+| 01000 | vfwcvt.xu.f.v
+| 01001 | vfwcvt.x.f.v
+| 01010 | vfwcvt.f.xu.v
+| 01011 | vfwcvt.f.x.v
+| 01100 | vfwcvt.f.f.v
+| 01110 | vfwcvt.rtz.xu.f.v
+| 01111 | vfwcvt.rtz.x.f.v
+| |
+2+| narrowing converts
+| 10000 | vfncvt.xu.f.w
+| 10001 | vfncvt.x.f.w
+| 10010 | vfncvt.f.xu.w
+| 10011 | vfncvt.f.x.w
+| 10100 | vfncvt.f.f.w
+| 10101 | vfncvt.rod.f.f.w
+| 10110 | vfncvt.rtz.xu.f.w
+| 10111 | vfncvt.rtz.x.f.w
+|===
+
+.VFUNARY1 encoding space
+[cols="2,14"]
+|===
+| vs1 | name
+
+| 00000 | vfsqrt.v
+| 00100 | vfrsqrt7.v
+| 00101 | vfrec7.v
+| 10000 | vfclass.v
+|===
+
+
+.VMUNARY0 encoding space
+[cols="2,14"]
+|===
+| vs1 |
+
+| 00001 | vmsbf
+| 00010 | vmsof
+| 00011 | vmsif
+| 10000 | viota
+| 10001 | vid
+|===
+
+
diff --git a/src/images/wavedrom/valu-format.adoc b/src/images/wavedrom/valu-format.adoc
new file mode 100644
index 0000000..cdd3447
--- /dev/null
+++ b/src/images/wavedrom/valu-format.adoc
@@ -0,0 +1,104 @@
+Formats for Vector Arithmetic Instructions under OP-V major opcode
+
+////
+31 26 25 24 20 19 15 14 12 11 7 6 0
+ funct6 | vm | vs2 | vs1 | 0 0 0 | vd |1010111| OP-V (OPIVV)
+ funct6 | vm | vs2 | vs1 | 0 0 1 | vd/rd |1010111| OP-V (OPFVV)
+ funct6 | vm | vs2 | vs1 | 0 1 0 | vd/rd |1010111| OP-V (OPMVV)
+ funct6 | vm | vs2 | imm[4:0] | 0 1 1 | vd |1010111| OP-V (OPIVI)
+ funct6 | vm | vs2 | rs1 | 1 0 0 | vd |1010111| OP-V (OPIVX)
+ funct6 | vm | vs2 | rs1 | 1 0 1 | vd |1010111| OP-V (OPFVF)
+ funct6 | vm | vs2 | rs1 | 1 1 0 | vd/rd |1010111| OP-V (OPMVX)
+ 6 1 5 5 3 5 7
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPIVV'},
+ {bits: 5, name: 'vd', type: 2},
+ {bits: 3, name: 0},
+ {bits: 5, name: 'vs1', type: 2},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPFVV'},
+ {bits: 5, name: 'vd / rd', type: 7},
+ {bits: 3, name: 1},
+ {bits: 5, name: 'vs1', type: 2},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPMVV'},
+ {bits: 5, name: 'vd / rd', type: 7},
+ {bits: 3, name: 2},
+ {bits: 5, name: 'vs1', type: 2},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: ['OPIVI']},
+ {bits: 5, name: 'vd', type: 2},
+ {bits: 3, name: 3},
+ {bits: 5, name: 'imm[4:0]', type: 5},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPIVX'},
+ {bits: 5, name: 'vd', type: 2},
+ {bits: 3, name: 4},
+ {bits: 5, name: 'rs1', type: 4},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPFVF'},
+ {bits: 5, name: 'vd', type: 2},
+ {bits: 3, name: 5},
+ {bits: 5, name: 'rs1', type: 4},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'OPMVX'},
+ {bits: 5, name: 'vd / rd', type: 7},
+ {bits: 3, name: 6},
+ {bits: 5, name: 'rs1', type: 4},
+ {bits: 5, name: 'vs2', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 6, name: 'funct6'},
+]}
+....
diff --git a/src/images/wavedrom/vcfg-format.adoc b/src/images/wavedrom/vcfg-format.adoc
new file mode 100644
index 0000000..ac0353c
--- /dev/null
+++ b/src/images/wavedrom/vcfg-format.adoc
@@ -0,0 +1,47 @@
+Formats for Vector Configuration Instructions under OP-V major opcode
+
+////
+ 31 30 25 24 20 19 15 14 12 11 7 6 0
+ 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+ 1 | 1| zimm[ 9:0] | uimm[4:0]| 1 1 1 | rd |1010111| vsetivli
+ 1 | 000000 | rs2 | rs1 | 1 1 1 | rd |1010111| vsetvl
+ 1 6 5 5 3 5 7
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'vsetvli'},
+ {bits: 5, name: 'rd', type: 4},
+ {bits: 3, name: 7},
+ {bits: 5, name: 'rs1', type: 4},
+ {bits: 11, name: 'vtypei[10:0]', type: 5},
+ {bits: 1, name: '0'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'vsetivli'},
+ {bits: 5, name: 'rd', type: 4},
+ {bits: 3, name: 7},
+ {bits: 5, name: 'uimm[4:0]', type: 5},
+ {bits: 10, name: 'vtypei[9:0]', type: 5},
+ {bits: 1, name: '1'},
+ {bits: 1, name: '1'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x57, attr: 'vsetvl'},
+ {bits: 5, name: 'rd', type: 4},
+ {bits: 3, name: 7},
+ {bits: 5, name: 'rs1', type: 4},
+ {bits: 5, name: 'rs2', type: 4},
+ {bits: 6, name: 0x00},
+ {bits: 1, name: 1},
+]}
+....
diff --git a/src/images/wavedrom/vfrec7.adoc b/src/images/wavedrom/vfrec7.adoc
new file mode 100644
index 0000000..d33f44e
--- /dev/null
+++ b/src/images/wavedrom/vfrec7.adoc
@@ -0,0 +1,136 @@
+.vfrec7.v common-case lookup table contents
+[%autowidth,float="center",align="center",options="header"]
+|===
+
+| sig[MSB -: 7] | sig_out[MSB -: 7]
+
+| 0 | 127
+| 1 | 125
+| 2 | 123
+| 3 | 121
+| 4 | 119
+| 5 | 117
+| 6 | 116
+| 7 | 114
+| 8 | 112
+| 9 | 110
+| 10 | 109
+| 11 | 107
+| 12 | 105
+| 13 | 104
+| 14 | 102
+| 15 | 100
+| 16 | 99
+| 17 | 97
+| 18 | 96
+| 19 | 94
+| 20 | 93
+| 21 | 91
+| 22 | 90
+| 23 | 88
+| 24 | 87
+| 25 | 85
+| 26 | 84
+| 27 | 83
+| 28 | 81
+| 29 | 80
+| 30 | 79
+| 31 | 77
+| 32 | 76
+| 33 | 75
+| 34 | 74
+| 35 | 72
+| 36 | 71
+| 37 | 70
+| 38 | 69
+| 39 | 68
+| 40 | 66
+| 41 | 65
+| 42 | 64
+| 43 | 63
+| 44 | 62
+| 45 | 61
+| 46 | 60
+| 47 | 59
+| 48 | 58
+| 49 | 57
+| 50 | 56
+| 51 | 55
+| 52 | 54
+| 53 | 53
+| 54 | 52
+| 55 | 51
+| 56 | 50
+| 57 | 49
+| 58 | 48
+| 59 | 47
+| 60 | 46
+| 61 | 45
+| 62 | 44
+| 63 | 43
+| 64 | 42
+| 65 | 41
+| 66 | 40
+| 67 | 40
+| 68 | 39
+| 69 | 38
+| 70 | 37
+| 71 | 36
+| 72 | 35
+| 73 | 35
+| 74 | 34
+| 75 | 33
+| 76 | 32
+| 77 | 31
+| 78 | 31
+| 79 | 30
+| 80 | 29
+| 81 | 28
+| 82 | 28
+| 83 | 27
+| 84 | 26
+| 85 | 25
+| 86 | 25
+| 87 | 24
+| 88 | 23
+| 89 | 23
+| 90 | 22
+| 91 | 21
+| 92 | 21
+| 93 | 20
+| 94 | 19
+| 95 | 19
+| 96 | 18
+| 97 | 17
+| 98 | 17
+| 99 | 16
+| 100 | 15
+| 101 | 15
+| 102 | 14
+| 103 | 14
+| 104 | 13
+| 105 | 12
+| 106 | 12
+| 107 | 11
+| 108 | 11
+| 109 | 10
+| 110 | 9
+| 111 | 9
+| 112 | 8
+| 113 | 8
+| 114 | 7
+| 115 | 7
+| 116 | 6
+| 117 | 5
+| 118 | 5
+| 119 | 4
+| 120 | 4
+| 121 | 3
+| 122 | 3
+| 123 | 2
+| 124 | 2
+| 125 | 1
+| 126 | 1
+| 127 | 0
+
+|===
diff --git a/src/images/wavedrom/vfrsqrt7.adoc b/src/images/wavedrom/vfrsqrt7.adoc
new file mode 100644
index 0000000..8ebc621
--- /dev/null
+++ b/src/images/wavedrom/vfrsqrt7.adoc
@@ -0,0 +1,137 @@
+.vfrsqrt7.v common-case lookup table contents
+[%autowidth,float=center,align=center,options="header"]
+|===
+
+|exp[0] | sig[MSB -: 6] | sig_out[MSB -: 7]
+
+| 0| 0 | 52
+| 0| 1 | 51
+| 0| 2 | 50
+| 0| 3 | 48
+| 0| 4 | 47
+| 0| 5 | 46
+| 0| 6 | 44
+| 0| 7 | 43
+| 0| 8 | 42
+| 0| 9 | 41
+| 0| 10 | 40
+| 0| 11 | 39
+| 0| 12 | 38
+| 0| 13 | 36
+| 0| 14 | 35
+| 0| 15 | 34
+| 0| 16 | 33
+| 0| 17 | 32
+| 0| 18 | 31
+| 0| 19 | 30
+| 0| 20 | 30
+| 0| 21 | 29
+| 0| 22 | 28
+| 0| 23 | 27
+| 0| 24 | 26
+| 0| 25 | 25
+| 0| 26 | 24
+| 0| 27 | 23
+| 0| 28 | 23
+| 0| 29 | 22
+| 0| 30 | 21
+| 0| 31 | 20
+| 0| 32 | 19
+| 0| 33 | 19
+| 0| 34 | 18
+| 0| 35 | 17
+| 0| 36 | 16
+| 0| 37 | 16
+| 0| 38 | 15
+| 0| 39 | 14
+| 0| 40 | 14
+| 0| 41 | 13
+| 0| 42 | 12
+| 0| 43 | 12
+| 0| 44 | 11
+| 0| 45 | 10
+| 0| 46 | 10
+| 0| 47 | 9
+| 0| 48 | 9
+| 0| 49 | 8
+| 0| 50 | 7
+| 0| 51 | 7
+| 0| 52 | 6
+| 0| 53 | 6
+| 0| 54 | 5
+| 0| 55 | 4
+| 0| 56 | 4
+| 0| 57 | 3
+| 0| 58 | 3
+| 0| 59 | 2
+| 0| 60 | 2
+| 0| 61 | 1
+| 0| 62 | 1
+| 0| 63 | 0
+
+| 1| 0 | 127
+| 1| 1 | 125
+| 1| 2 | 123
+| 1| 3 | 121
+| 1| 4 | 119
+| 1| 5 | 118
+| 1| 6 | 116
+| 1| 7 | 114
+| 1| 8 | 113
+| 1| 9 | 111
+| 1| 10 | 109
+| 1| 11 | 108
+| 1| 12 | 106
+| 1| 13 | 105
+| 1| 14 | 103
+| 1| 15 | 102
+| 1| 16 | 100
+| 1| 17 | 99
+| 1| 18 | 97
+| 1| 19 | 96
+| 1| 20 | 95
+| 1| 21 | 93
+| 1| 22 | 92
+| 1| 23 | 91
+| 1| 24 | 90
+| 1| 25 | 88
+| 1| 26 | 87
+| 1| 27 | 86
+| 1| 28 | 85
+| 1| 29 | 84
+| 1| 30 | 83
+| 1| 31 | 82
+| 1| 32 | 80
+| 1| 33 | 79
+| 1| 34 | 78
+| 1| 35 | 77
+| 1| 36 | 76
+| 1| 37 | 75
+| 1| 38 | 74
+| 1| 39 | 73
+| 1| 40 | 72
+| 1| 41 | 71
+| 1| 42 | 70
+| 1| 43 | 70
+| 1| 44 | 69
+| 1| 45 | 68
+| 1| 46 | 67
+| 1| 47 | 66
+| 1| 48 | 65
+| 1| 49 | 64
+| 1| 50 | 63
+| 1| 51 | 63
+| 1| 52 | 62
+| 1| 53 | 61
+| 1| 54 | 60
+| 1| 55 | 59
+| 1| 56 | 59
+| 1| 57 | 58
+| 1| 58 | 57
+| 1| 59 | 56
+| 1| 60 | 56
+| 1| 61 | 55
+| 1| 62 | 54
+| 1| 63 | 53
+
+|=== \ No newline at end of file
diff --git a/src/images/wavedrom/vmem-format.adoc b/src/images/wavedrom/vmem-format.adoc
new file mode 100644
index 0000000..f9b25ee
--- /dev/null
+++ b/src/images/wavedrom/vmem-format.adoc
@@ -0,0 +1,108 @@
+Format for Vector Load Instructions under LOAD-FP major opcode
+
+////
+31 29 28 27 26 25 24 20 19 15 14 12 11 7 6 0
+ nf | mew| mop | vm | lumop | rs1 | width | vd |0000111| VL* unit-stride
+ nf | mew| mop | vm | rs2 | rs1 | width | vd |0000111| VLS* strided
+ nf | mew| mop | vm | vs2 | rs1 | width | vd |0000111| VLX* indexed
+ 3 1 2 1 5 5 3 5 7
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x7, attr: 'VL* unit-stride'},
+ {bits: 5, name: 'vd', attr: 'destination of load', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'lumop'},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x7, attr: 'VLS* strided'},
+ {bits: 5, name: 'vd', attr: 'destination of load', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'rs2', attr: 'stride', type: 4},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x7, attr: 'VLX* indexed'},
+ {bits: 5, name: 'vd', attr: 'destination of load', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'vs2', attr: 'address offsets', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+Format for Vector Store Instructions under STORE-FP major opcode
+
+////
+31 29 28 27 26 25 24 20 19 15 14 12 11 7 6 0
+ nf | mew| mop | vm | sumop | rs1 | width | vs3 |0100111| VS* unit-stride
+ nf | mew| mop | vm | rs2 | rs1 | width | vs3 |0100111| VSS* strided
+ nf | mew| mop | vm | vs2 | rs1 | width | vs3 |0100111| VSX* indexed
+ 3 1 2 1 5 5 3 5 7
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x27, attr: 'VS* unit-stride'},
+ {bits: 5, name: 'vs3', attr: 'store data', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'sumop'},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x27, attr: 'VSS* strided'},
+ {bits: 5, name: 'vs3', attr: 'store data', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'rs2', attr: 'stride', type: 4},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x27, attr: 'VSX* indexed'},
+ {bits: 5, name: 'vs3', attr: 'store data', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 'vs2', attr: 'address offsets', type: 2},
+ {bits: 1, name: 'vm'},
+ {bits: 2, name: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
diff --git a/src/images/wavedrom/vtype-format.adoc b/src/images/wavedrom/vtype-format.adoc
new file mode 100644
index 0000000..9e6ab34
--- /dev/null
+++ b/src/images/wavedrom/vtype-format.adoc
@@ -0,0 +1,28 @@
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 3, name: 'vlmul[2:0]'},
+ {bits: 3, name: 'vsew[2:0]'},
+ {bits: 1, name: 'vta'},
+ {bits: 1, name: 'vma'},
+ {bits: 23, name: 'reserved'},
+ {bits: 1, name: 'vill'},
+]}
+....
+
+NOTE: This diagram shows the layout for RV32 systems, whereas in
+general `vill` should be at bit XLEN-1.
+
+.`vtype` register layout
+[cols=">2,4,10"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Bits | Name | Description
+
+| XLEN-1 | vill | Illegal value if set
+| XLEN-2:8 | 0 | Reserved if non-zero
+| 7 | vma | Vector mask agnostic
+| 6 | vta | Vector tail agnostic
+| 5:3 | vsew[2:0] | Selected element width (SEW) setting
+| 2:0 | vlmul[2:0] | Vector register group multiplier (LMUL) setting
+|===
diff --git a/src/intro.adoc b/src/intro.adoc
index d251964..78d7a34 100644
--- a/src/intro.adoc
+++ b/src/intro.adoc
@@ -154,7 +154,7 @@ as more abstract EEIs provide greater portability across different
hardware platforms. Often EEIs are layered on top of one another, where
one higher-level EEI uses another lower-level EEI.
====
-(((hart, exectution environment)))
+(((hart, execution environment)))
From the perspective of software running in a given execution
environment, a hart is a resource that autonomously fetches and executes
RISC-V instructions within that execution environment. In this respect,
@@ -195,7 +195,7 @@ environment but must do so in a way that guest harts operate like
independent hardware threads. In particular, if there are more guest
harts than host harts then the execution environment must be able to
preempt the guest harts and must not wait indefinitely for guest
-software on a guest hart to “yield" control of the guest hart.
+software on a guest hart to "yield" control of the guest hart.
====
=== RISC-V ISA Overview
@@ -221,9 +221,9 @@ integer variants, RV32I and RV64I, described in
<<rv32>> and <<rv64>>, which provide 32-bit
or 64-bit address spaces respectively. We use the term XLEN to refer to
the width of an integer register in bits (either 32 or 64).
-<<rv32e, Chapter 6>> describes the RV32E subset variant of the
-RV32I base instruction set, which has been added to support small
-microcontrollers, and which has half the number of integer registers.
+<<rv32e, Chapter 6>> describes the RV32E and RV64E subset variants of the
+RV32I or RV64I base instruction sets respectively, which have been added to support small
+microcontrollers, and which have half the number of integer registers.
<<rv128, Chapter 8>> sketches a future RV128I variant of the
base integer instruction set supporting a flat 128-bit address space
(XLEN=128). The base integer instruction sets use a two's-complement
@@ -258,8 +258,8 @@ reserved for instructions only required by wider address-space variants.
The main disadvantage of not treating the design as a single ISA is that
it complicates the hardware needed to emulate one base ISA on another
-(e.g., RV32I on RV64I). However, differences in addressing and illegal
-instruction traps generally mean some mode switch would be required in
+(e.g., RV32I on RV64I). However, differences in addressing and
+illegal-instruction traps generally mean some mode switch would be required in
hardware in any case even with full superset instruction encodings, and
the different RISC-V base ISAs are similar enough that supporting
multiple versions is relatively low cost. Although some have proposed
@@ -525,13 +525,13 @@ We also wanted to optionally allow longer instructions to support
experimentation and larger instruction-set extensions. Although our
encoding convention required a tighter encoding of the core RISC-V ISA,
this has several beneficial effects.
-(((IMAFED)))
+(((IMAFD)))
An implementation of the standard IMAFD ISA need only hold the
most-significant 30 bits in instruction caches (a 6.25% saving). On
instruction cache refills, any instructions encountered with either low
bit clear should be recoded into illegal 30-bit instructions before
-storing in the cache to preserve illegal instruction exception behavior.
+storing in the cache to preserve illegal-instruction exception behavior.
Perhaps more importantly, by condensing our base ISA into a subset of
the 32-bit instruction word, we leave more space available for
@@ -572,7 +572,7 @@ standard binary library used by many different machines). Defining a
32-bit word of all ones as illegal was also considered, as all machines
must support a 32-bit instruction size, but this requires the
instruction-fetch unit on machines with ILEN >32 report an
-illegal instruction exception rather than an access-fault exception when
+illegal-instruction exception rather than an access-fault exception when
such an instruction borders a protection boundary, complicating
variable-instruction-length fetch and decode.
====
@@ -737,4 +737,4 @@ to further constrain cases that the base architecture defines as UNSPECIFIED.
Like the base architecture, extensions should fully describe allowable
behavior and values and use the term UNSPECIFIED for cases that are intentionally
unconstrained. These cases may be constrained or defined by other
-extensions, platform standards, or implementations. \ No newline at end of file
+extensions, platform standards, or implementations.
diff --git a/src/latex/figs/PLIC-block-diagram.pdf b/src/latex/figs/PLIC-block-diagram.pdf
deleted file mode 100644
index c6ece1a..0000000
--- a/src/latex/figs/PLIC-block-diagram.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/PLIC-interrupt-flow.pdf b/src/latex/figs/PLIC-interrupt-flow.pdf
deleted file mode 100644
index 7ba4117..0000000
--- a/src/latex/figs/PLIC-interrupt-flow.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/halimps.pdf b/src/latex/figs/halimps.pdf
deleted file mode 100644
index 2cc5a58..0000000
--- a/src/latex/figs/halimps.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/halmode.pdf b/src/latex/figs/halmode.pdf
deleted file mode 100644
index afa441b..0000000
--- a/src/latex/figs/halmode.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_addrpo.pdf b/src/latex/figs/litmus_addrpo.pdf
deleted file mode 100644
index ad15fb3..0000000
--- a/src/latex/figs/litmus_addrpo.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_addrpo.pdf_t b/src/latex/figs/litmus_addrpo.pdf_t
deleted file mode 100644
index 2dcc621..0000000
--- a/src/latex/figs/litmus_addrpo.pdf_t
+++ /dev/null
@@ -1,49 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_addrpo.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.36.0 (20140111.2315)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3628,1984)(-14,-1133)
-% eiid0
-\put(821,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Ry=1}%
-}}}}
-% eiid1
-\put(821,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wx=t}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Rx=t}%
-}}}}
-% eiid3
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Rt=$v$}%
-}}}}
-% eiid4
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Wy=1}%
-}}}}
-\put( 1, 240){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(461, 240){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1742,284){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2561, 250){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(3011, 250){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(3185,-400){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2731,-529){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}po}%
-}}}}
-\put(1743,20){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_datacoirfi.pdf b/src/latex/figs/litmus_datacoirfi.pdf
deleted file mode 100644
index 6497bc0..0000000
--- a/src/latex/figs/litmus_datacoirfi.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_datacoirfi.pdf_t b/src/latex/figs/litmus_datacoirfi.pdf_t
deleted file mode 100644
index 24655ea..0000000
--- a/src/latex/figs/litmus_datacoirfi.pdf_t
+++ /dev/null
@@ -1,61 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_datacoirfi.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.36.0 (20140111.2315)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3447,3604)(-14,-2753)
-% eiid0
-\put(821,-616){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-1426){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid3
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wz=1}%
-}}}}
-% eiid4
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Wz=1}%
-}}}}
-% eiid5
-\put(2981,-1831){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}g: Rz=1}%
-}}}}
-% eiid6
-\put(2981,-2641){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}h: Rx=0}%
-}}}}
-\put( 1,-990){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(460,-990){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1739,-325){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2601,225){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data}%
-}}}}
-\put(3031,225){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2741,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(3031,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2821,-1339){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2561,-2105){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(3031,-2105){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1743,-1340){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_datarfi.pdf b/src/latex/figs/litmus_datarfi.pdf
deleted file mode 100644
index 5cb25c3..0000000
--- a/src/latex/figs/litmus_datarfi.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_datarfi.pdf_t b/src/latex/figs/litmus_datarfi.pdf_t
deleted file mode 100644
index 7120e66..0000000
--- a/src/latex/figs/litmus_datarfi.pdf_t
+++ /dev/null
@@ -1,56 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_datarfi.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.36.0 (20140111.2315)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3886,2794)(-14,-1943)
-% eiid0
-\put(821,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid3
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wz=1}%
-}}}}
-% eiid4
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Rz=1}%
-}}}}
-% eiid5
-\put(2981,-1831){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}g: Rx=0}%
-}}}}
-\put( 1,-575){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(460,-575){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1739,-122){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2581,250){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data}%
-}}}}
-\put(3011,250){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(3540,-121){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2821,-769){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2561,-1295){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(3031 ,-1295){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2043,-732){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_lb_lrsc.pdf b/src/latex/figs/litmus_lb_lrsc.pdf
deleted file mode 100644
index 3aeafed..0000000
--- a/src/latex/figs/litmus_lb_lrsc.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_lb_lrsc.pdf_t b/src/latex/figs/litmus_lb_lrsc.pdf_t
deleted file mode 100644
index 2cb417e..0000000
--- a/src/latex/figs/litmus_lb_lrsc.pdf_t
+++ /dev/null
@@ -1,50 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_lb_lrsc.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3104,2794)(-11,-1943)
-% eiid0
-\put(481,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Rx=0}%
-}}}}
-% eiid1
-\put(481,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}b: Rz*=0}%
-}}}}
-% eiid2
-\put(481,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wz*=0}%
-}}}}
-% eiid3
-\put(481,-1831){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Wy=0}%
-}}}}
-% eiid4
-\put(2641,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Ry=0}%
-}}}}
-% eiid5
-\put(2641,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Wx=0}%
-}}}}
-\put(261,281){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}po}%
-}}}}
-\put(646,40){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data ppo}%
-}}}}
-\put(85,-529){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510} ppo}%
-}}}}
-\put(85,-1339){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data ppo}%
-}}}}
-\put(1418,-538){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2680,281){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data\,ppo}%
-}}}}
-\put(1400,350){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf b/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf
deleted file mode 100644
index 37c676d..0000000
--- a/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf_t b/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf_t
deleted file mode 100644
index 6df2213..0000000
--- a/src/latex/figs/litmus_mp_fenceww_fri_rfi_addr.pdf_t
+++ /dev/null
@@ -1,56 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_mp_fenceww_fri_rfi_addr.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3727,2632)(-14,-1781)
-% eiid0
-\put(821,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-157){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid3
-\put(2981,-157){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wy=2}%
-}}}}
-% eiid4
-\put(2981,-913){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Ry=2}%
-}}}}
-% eiid5
-\put(2981,-1669){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}i: Rx=0}%
-}}}}
-\put( 1,349){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(461,349){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1742,331){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(1686,-47){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(2461,280){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(2621,280){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2770,-550){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(3011,-1163){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(3411,-1163){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1741,-550){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_ppoca.pdf b/src/latex/figs/litmus_ppoca.pdf
deleted file mode 100644
index 7aadd1f..0000000
--- a/src/latex/figs/litmus_ppoca.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_ppoca.pdf_t b/src/latex/figs/litmus_ppoca.pdf_t
deleted file mode 100644
index 9e6a06d..0000000
--- a/src/latex/figs/litmus_ppoca.pdf_t
+++ /dev/null
@@ -1,58 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_ppoca.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.36.0 (20140111.2315)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3886,2794)(-14,-1943)
-% eiid0
-\put(821,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid3
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wz=1}%
-}}}}
-% eiid4
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Rz=1}%
-}}}}
-% eiid5
-\put(2981,-1831){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}g: Rx=0}%
-}}}}
-\put(0,-625){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(441,-625){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1739,-122){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2641,225){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ctrl}%
-}}}}
-\put(3020,225){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(3520,-1000){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ctrl}%
-}}}}
-\put(3100,-544){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ctrl}%
-}}}}
-\put(2800,-600){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2561,-1295){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(3020,-1295){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1743,-782){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_rsw.pdf b/src/latex/figs/litmus_rsw.pdf
deleted file mode 100644
index 4c35d73..0000000
--- a/src/latex/figs/litmus_rsw.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_rsw.pdf_t b/src/latex/figs/litmus_rsw.pdf_t
deleted file mode 100644
index 2393619..0000000
--- a/src/latex/figs/litmus_rsw.pdf_t
+++ /dev/null
@@ -1,61 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_rsw.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(4187,2308)(-14,-1457)
-% eiid0
-\put(821,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-49){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid2
-\put(2261,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid3
-\put(2261,-49){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}g: Rz=$v$}%
-}}}}
-% eiid4
-\put(2261,-697){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}h: Rz=$v$}%
-}}}}
-% eiid5
-\put(2261,-1345){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}k: Rx=0}%
-}}}}
-% eiid6
-\put(3701,-373){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0} Wz=$v$}%
-}}}}
-\put( 1,173){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(501,173){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1376,363){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(1541,173){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(1941,173){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2041,-279){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}po}%
-}}}}
-\put(1541,-1123){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}addr}%
-}}}}
-\put(1941,-1123){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1347,-294){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(2844,-126){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2844,-440){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_sample.pdf b/src/latex/figs/litmus_sample.pdf
deleted file mode 100644
index a12a6ec..0000000
--- a/src/latex/figs/litmus_sample.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_sample.pdf_t b/src/latex/figs/litmus_sample.pdf_t
deleted file mode 100644
index bcf40a2..0000000
--- a/src/latex/figs/litmus_sample.pdf_t
+++ /dev/null
@@ -1,52 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_sample.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(2410,2794)(-14,-1943)
-% eiid0
-\put(720,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(720,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}b: Wx=2}%
-}}}}
-% eiid2
-\put(720,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Rx=1}%
-}}}}
-% eiid3
-\put(720,-1831){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Wx=3}%
-}}}}
-% eiid4
-\put(1944, -8){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wx=4}%
-}}}}
-% eiid5
-\put(1944,-818){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Wx=5}%
-}}}}
-\put(837,272){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(1251,-121){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(382,282){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(1217,-931){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put( 1,-121){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(560,-763){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(560,-1339){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(1724,-566){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_sb_fwd.pdf b/src/latex/figs/litmus_sb_fwd.pdf
deleted file mode 100644
index 432dd9a..0000000
--- a/src/latex/figs/litmus_sb_fwd.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_sb_fwd.pdf_t b/src/latex/figs/litmus_sb_fwd.pdf_t
deleted file mode 100644
index 2eb0d6e..0000000
--- a/src/latex/figs/litmus_sb_fwd.pdf_t
+++ /dev/null
@@ -1,52 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_sb_fwd.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3447,1984)(-14,-1133)
-% eiid0
-\put(821,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=1}%
-}}}}
-% eiid1
-\put(821,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}b: Rx=1}%
-}}}}
-% eiid2
-\put(821,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=0}%
-}}}}
-% eiid3
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wy=1}%
-}}}}
-% eiid4
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Ry=1}%
-}}}}
-% eiid5
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}h: Rx=0}%
-}}}}
-\put(661,281){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put( 1,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(450,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1539,-122){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\put(3021,281){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(3061,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(3511,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2103,-122){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1.000,0.627,0.251}fr}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/litmus_subsumption.pdf b/src/latex/figs/litmus_subsumption.pdf
deleted file mode 100644
index 89c85ef..0000000
--- a/src/latex/figs/litmus_subsumption.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/litmus_subsumption.pdf_t b/src/latex/figs/litmus_subsumption.pdf_t
deleted file mode 100644
index 12a543d..0000000
--- a/src/latex/figs/litmus_subsumption.pdf_t
+++ /dev/null
@@ -1,49 +0,0 @@
-\begin{picture}(0,0)%
-\includegraphics{figs/litmus_subsumption.pdf}%
-\end{picture}%
-%
-% Generated by graphviz version 2.38.0 (20140413.2041)
-% Title: G
-% Pages: 1
-%
-\setlength{\unitlength}{3947sp}%
-%
-\begingroup\makeatletter\ifx\SetFigFont\undefined%
-\gdef\SetFigFont#1#2#3#4#5{%
- \reset@font\fontsize{#1}{#2pt}%
- \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
- \selectfont}%
-\fi\endgroup%
-\begin{picture}(3447,1984)(-14,-1133)
-% eiid0
-\put(821,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}a: Wx=3}%
-}}}}
-% eiid1
-\put(821,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}c: Wy=1}%
-}}}}
-% eiid3
-\put(2981,-211){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}e: Wx=1}%
-}}}}
-% eiid2
-\put(2981,599){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}d: Ry=1}%
-}}}}
-% eiid4
-\put(2981,-1021){\makebox(0,0)[b]{\smash{{\SetFigFont{8}{9.6}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}f: Wx=2}%
-}}}}
-\put( -40, 190){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.000,0.392,0.000}fence}%
-}}}}
-\put(450, 190){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(1673,380){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(1742, 44){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{1,0,0}rf}%
-}}}}
-\put(2261, 230){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}data}%
-}}}}
-\put(2650, 230){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\put(2420,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,1}co}%
-}}}}
-\put(2630,-485){\makebox(0,0)[lb]{\smash{{\SetFigFont{11}{13.2}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0.294,0.000,0.510}ppo}%
-}}}}
-\end{picture}%
diff --git a/src/latex/figs/privimps.pdf b/src/latex/figs/privimps.pdf
deleted file mode 100644
index 9ed591b..0000000
--- a/src/latex/figs/privimps.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/figs/virtimps.pdf b/src/latex/figs/virtimps.pdf
deleted file mode 100644
index 7b98872..0000000
--- a/src/latex/figs/virtimps.pdf
+++ /dev/null
Binary files differ
diff --git a/src/latex/hypervisor.tex b/src/latex/hypervisor.tex
deleted file mode 100644
index 4e3fbe5..0000000
--- a/src/latex/hypervisor.tex
+++ /dev/null
@@ -1,3590 +0,0 @@
-\chapter{Hypervisor Extension, Version 1.0}
-\label{hypervisor}
-
-This chapter describes the RISC-V hypervisor extension, which virtualizes the
-supervisor-level architecture to support the efficient hosting of guest
-operating systems atop a type-1 or type-2 hypervisor.
-The hypervisor extension changes supervisor mode into
-{\em hypervisor-extended supervisor mode} (HS-mode, or {\em hypervisor
-mode} for short), where a hypervisor or a hosting-capable operating system
-runs. The hypervisor extension also adds another stage of address translation,
-from {\em guest physical addresses} to supervisor physical addresses,
-to virtualize the
-memory and memory-mapped I/O subsystems for a guest operating system. HS-mode
-acts the same as S-mode, but with additional instructions and CSRs that control
-the new stage of address translation and support hosting a guest OS in virtual
-S-mode (VS-mode).
-Regular S-mode operating systems can execute without modification either in
-HS-mode or as VS-mode guests.
-
-In HS-mode, an OS or hypervisor interacts with the machine through the same
-SBI as an OS normally does from S-mode. An HS-mode hypervisor is expected to
-implement the SBI for its VS-mode guest.
-
-The hypervisor extension depends on an ``I'' base integer ISA with
-32 {\tt x} registers (RV32I or RV64I), not RV32E, which has only
-16 {\tt x} registers.
-CSR {\tt mtval} must not be read-only zero, and
-standard page-based address translation must be supported, either
-Sv32 for RV32, or a minimum of Sv39 for RV64.
-
-The hypervisor extension is enabled by setting bit 7 in the {\tt misa} CSR,
-which corresponds to the letter H.
-RISC-V harts that implement the hypervisor extension are encouraged
-not to hardwire {\tt misa}[7], so that the extension may be disabled.
-
-\begin{commentary}
-The baseline privileged architecture is designed to simplify the use of classic
-virtualization techniques, where a guest OS is run at user-level, as
-the few privileged instructions can be easily detected and trapped.
-The hypervisor extension improves virtualization performance by
-reducing the frequency of these traps.
-
-The hypervisor extension has been designed to be efficiently
-emulable on platforms that do not implement the extension, by running
-the hypervisor in S-mode and trapping into M-mode for hypervisor CSR accesses
-and to maintain shadow page tables. The majority of CSR accesses for
-type-2 hypervisors are valid S-mode accesses so need not be trapped.
-Hypervisors can support nested virtualization analogously.
-\end{commentary}
-
-\section{Privilege Modes}
-
-The current {\em virtualization mode}, denoted V, indicates whether the hart
-is currently executing in a guest.
-When V=1, the hart is either in virtual S-mode (VS-mode), or in virtual U-mode
-(VU-mode) atop a guest OS running in VS-mode.
-When V=0, the hart is either in M-mode, in HS-mode, or in U-mode atop an OS
-running in HS-mode.
-The virtualization mode also indicates whether two-stage address translation
-is active (V=1) or inactive (V=0). Table~\ref{tab:HPrivModes} lists the
-possible privilege modes of a RISC-V hart with the hypervisor extension.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c||l|l|l|}
- \hline
- Virtualization & Nominal & \multirow{2}{*}{Abbreviation} & \multirow{2}{*}{Name} & Two-Stage \\
- Mode (V) & Privilege & & & Translation \\ \hline
- 0 & U & U-mode & User mode & Off \\
- 0 & S & HS-mode & Hypervisor-extended supervisor mode & Off \\
- 0 & M & M-mode & Machine mode & Off \\
- \hline
- 1 & U & VU-mode & Virtual user mode & On \\
- 1 & S & VS-mode & Virtual supervisor mode & On \\
- \hline
- \end{tabular}
-\end{center}
-\caption{Privilege modes with the hypervisor extension.}
-\label{tab:HPrivModes}
-\end{table*}
-
-For privilege modes U and VU, the \textit{nominal privilege mode} is~U,
-and for privilege modes HS and VS, the nominal privilege mode is~S.
-
-HS-mode is more privileged
-than VS-mode, and VS-mode is more privileged than VU-mode.
-VS-mode interrupts are globally disabled when executing in U-mode.
-
-\begin{commentary}
-This description does not consider the possibility of U-mode or VU-mode interrupts and will be revised if an extension for user-level interrupts is adopted.
-\end{commentary}
-
-\section{Hypervisor and Virtual Supervisor CSRs}
-
-An OS or hypervisor running in HS-mode uses the supervisor CSRs to interact with the exception,
-interrupt, and address-translation subsystems.
-Additional CSRs are provided to HS-mode, but not to VS-mode, to manage
-two-stage address translation and to control the behavior of a VS-mode guest:
-{\tt hstatus}, {\tt hedeleg}, {\tt hideleg}, {\tt hvip}, {\tt hip}, {\tt hie},
-{\tt hgeip}, {\tt hgeie}, {\tt henvcfg}, {\tt henvcfgh},
-{\tt hcounteren}, {\tt htimedelta}, {\tt htimedeltah}, {\tt htval},
-{\tt htinst}, and {\tt hgatp}.
-
-Furthermore, several {\em virtual supervisor} CSRs (VS CSRs) are replicas
-of the normal supervisor CSRs.
-For example, {\tt vsstatus} is the VS CSR that duplicates the usual
-{\tt sstatus} CSR.
-
-When V=1, the VS CSRs substitute for the corresponding supervisor CSRs,
-taking over all functions of the usual supervisor CSRs except as specified
-otherwise.
-Instructions that normally read or modify a supervisor CSR shall instead
-access the corresponding VS CSR.
-When V=1, an attempt to read or write a VS CSR directly by its own
-separate CSR address causes a virtual instruction exception.
-(Attempts from U-mode cause an illegal instruction exception as usual.)
-The VS CSRs can be accessed as themselves only from M-mode or HS-mode.
-
-While V=1, the normal HS-level supervisor CSRs that are replaced by
-VS CSRs retain their values but do
-not affect the behavior of the machine unless specifically documented to
-do so.
-Conversely, when V=0, the VS CSRs do not ordinarily affect the behavior of
-the machine other than being readable and writable by CSR instructions.
-
-Some standard supervisor CSRs ({\tt senvcfg},
-{\tt scounteren}, and {\tt scontext},
-possibly others) have no matching VS CSR.
-These supervisor CSRs continue to have their usual function and
-accessibility even when V=1, except with VS-mode and VU-mode substituting for
-HS-mode and U-mode.
-Hypervisor software is expected to manually swap the contents of these
-registers as needed.
-
-\begin{commentary}
-Matching VS CSRs exist only for the supervisor CSRs that must be
-duplicated, which are mainly those that get automatically written by
-traps or that impact instruction execution immediately after trap entry
-and/or right before SRET, when software alone is unable to swap a CSR at
-exactly the right moment.
-Currently, most supervisor CSRs fall into this category, but future ones
-might not.
-\end{commentary}
-
-In this chapter, we use the term {\em HSXLEN} to refer to the effective XLEN
-when executing in HS-mode, and {\em VSXLEN} to refer to the effective
-XLEN when executing in VS-mode.
-
-\subsection{Hypervisor Status Register ({\tt hstatus})}
-
-The {\tt hstatus} register is an HSXLEN-bit read/write register
-formatted as shown in Figure~\ref{hstatusreg-rv32} when HSXLEN=32 and
-Figure~\ref{hstatusreg} when HSXLEN=64.
-The {\tt hstatus}
-register provides facilities analogous to the {\tt mstatus} register
-for tracking and controlling the exception behavior of a VS-mode guest.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\scalebox{0.95}{
-\begin{tabular}{YcccWYccWcccccF}
-\\
-\instbitrange{31}{23} &
-\instbit{22} &
-\instbit{21} &
-\instbit{20} &
-\instbitrange{19}{18} &
-\instbitrange{17}{12} &
-\instbitrange{11}{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{0} \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{VTSR} &
-\multicolumn{1}{c|}{VTW} &
-\multicolumn{1}{c|}{VTVM} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VGEIN[5:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{HU} &
-\multicolumn{1}{c|}{SPVP} &
-\multicolumn{1}{c|}{SPV} &
-\multicolumn{1}{c|}{GVA} &
-\multicolumn{1}{c|}{VSBE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
-9 & 1 & 1 & 1 & 2 & 6 & 2 & 1 & 1 & 1 & 1 & 1 & 5 \\
-\end{tabular}}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor status register ({\tt hstatus}) when HSXLEN=32.}
-\label{hstatusreg-rv32}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{KFScccc}
-\\
-\instbitrange{HSXLEN-1}{34} &
-\instbitrange{33}{32} &
-\instbitrange{31}{23} &
-\instbit{22} &
-\instbit{21} &
-\instbit{20} &
- \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{VSXL[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VTSR} &
-\multicolumn{1}{c|}{VTW} &
-\multicolumn{1}{c|}{VTVM} &
- \\
-\hline
-HSXLEN-34 & 2 & 9 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWRWcccccY}
-\\
-&
-\instbitrange{19}{18} &
-\instbitrange{17}{12} &
-\instbitrange{11}{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{VGEIN[5:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{HU} &
-\multicolumn{1}{c|}{SPVP} &
-\multicolumn{1}{c|}{SPV} &
-\multicolumn{1}{c|}{GVA} &
-\multicolumn{1}{c|}{VSBE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 6 & 2 & 1 & 1 & 1 & 1 & 1 & 5 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor status register ({\tt hstatus}) when HSXLEN=64.}
-\label{hstatusreg}
-\end{figure*}
-
-The VSXL field controls the effective XLEN for VS-mode (known as VSXLEN),
-which may differ from the XLEN for HS-mode (HSXLEN).
-When HSXLEN=32, the VSXL field does not exist, and VSXLEN=32.
-When HSXLEN=64, VSXL is a \warl\ field that is encoded the same as the
-MXL field of {\tt misa}, shown in Table~\ref{misabase} on
-page~\pageref{misabase}.
-In particular, an implementation may make VSXL be a read-only field whose
-value always ensures that VSXLEN=HSXLEN.
-
-If HSXLEN is changed from 32 to a wider width, and if field VSXL is not
-restricted to a single value, it gets the value corresponding to the
-widest supported width not wider than the new HSXLEN.
-
-The {\tt hstatus} fields VTSR, VTW, and VTVM are defined analogously to the
-{\tt mstatus} fields TSR, TW, and TVM, but affect execution only in VS-mode,
-and cause virtual instruction exceptions instead of illegal instruction
-exceptions.
-When VTSR=1, an attempt in VS-mode to execute SRET raises a virtual
-instruction exception.
-When VTW=1 (and assuming {\tt mstatus}.TW=0), an attempt in VS-mode to
-execute WFI raises a virtual instruction exception if the WFI does not
-complete within an implementation-specific, bounded time limit.
-An implementation may have WFI always raise a virtual instruction exception in
-VS-mode when VTW=1 (and {\tt mstatus}.TW=0), even if there are pending
-globally-disabled interrupts when the instruction is executed.
-When VTVM=1, an attempt in VS-mode to execute SFENCE.VMA or SINVAL.VMA or to
-access CSR {\tt satp} raises a virtual instruction exception.
-
-The VGEIN (Virtual Guest External Interrupt Number) field selects a guest
-external interrupt source for VS-level external interrupts.
-VGEIN is a \wlrl\ field that must be able to hold values between zero
-and the maximum guest external interrupt number (known as GEILEN),
-inclusive.
-When VGEIN=0, no guest external interrupt source is selected for VS-level
-external interrupts.
-GEILEN may be zero, in which case VGEIN may be read-only zero.
-Guest external interrupts are explained in
-Section~\ref{sec:hgeinterruptregs}, and the use of VGEIN is covered
-further in Section~\ref{sec:hinterruptregs}.
-
-Field HU (Hypervisor in U-mode) controls whether the virtual-machine
-load/store instructions, HLV, HLVX, and HSV, can be used also in U-mode.
-When HU=1, these instructions can be executed in U-mode the same as in
-HS-mode.
-When HU=0, all hypervisor instructions cause an illegal instruction trap
-in U-mode.
-
-\begin{commentary}
-The HU bit allows a portion of a hypervisor to be run in U-mode for
-greater protection against software bugs, while still retaining access to
-a virtual machine's memory.
-\end{commentary}
-
-The SPV bit (Supervisor Previous Virtualization mode) is written by the implementation
-whenever a trap is taken into HS-mode.
-Just as the SPP bit in {\tt sstatus} is set to the (nominal) privilege
-mode at the time of the trap, the SPV bit in {\tt hstatus} is set to the value of the virtualization
-mode V at the time of the trap. When an SRET instruction is executed when V=0,
-V is set to SPV.
-
-When V=1 and a trap is taken into HS-mode, bit SPVP (Supervisor Previous
-Virtual Privilege) is set to the nominal privilege mode at the time of the trap,
-the same as {\tt sstatus}.SPP.
-But if V=0 before a trap, SPVP is left unchanged on trap entry.
-SPVP controls the effective privilege of explicit memory accesses made by
-the virtual-machine load/store instructions, HLV, HLVX, and HSV.
-
-\begin{commentary}
-Without SPVP, if instructions HLV, HLVX, and HSV looked instead to
-{\tt sstatus}.SPP for the effective privilege of their memory accesses,
-then, even with HU=1, U-mode could not access virtual machine memory at
-VS-level, because to enter U-mode using SRET always leaves SPP=0.
-Unlike SPP, field SPVP is untouched by transitions back-and-forth between
-HS-mode and U-mode.
-\end{commentary}
-
-Field GVA (Guest Virtual Address) is written by the implementation
-whenever a trap is taken into HS-mode.
-For any trap (breakpoint, address misaligned,
-access fault, page fault, or guest-page fault) that writes
-a guest virtual address to {\tt stval}, GVA is set to~1.
-For any other trap into HS-mode, GVA is set to~0.
-
-\begin{commentary}
-For breakpoint and memory access traps
-that write a nonzero value to {\tt stval},
-GVA is redundant with field SPV (the two bits are set
-the same) except when the explicit memory access of an HLV, HLVX, or HSV
-instruction causes a fault.
-In that case, SPV=0 but GVA=1.
-\end{commentary}
-
-The VSBE bit is a \warl\ field that controls the endianness of explicit
-memory accesses made from VS-mode.
-If VSBE=0, explicit load and store memory accesses made from VS-mode are
-little-endian, and if VSBE=1, they are big-endian.
-VSBE also controls the endianness of all implicit accesses to VS-level
-memory management data structures, such as page tables.
-An implementation may make VSBE a read-only field that always specifies
-the same endianness as HS-mode.
-
-\subsection{Hypervisor Trap Delegation Registers ({\tt hedeleg} and {\tt hideleg})}
-
-Registers {\tt hedeleg} and {\tt hideleg} are HSXLEN-bit read/write
-registers, formatted as shown in Figures \ref{hedelegreg} and
-\ref{hidelegreg} respectively.
-By default, all traps at any privilege level are handled in M-mode, though
-M-mode usually uses the {\tt medeleg} and {\tt mideleg} CSRs to delegate
-some traps to HS-mode. The {\tt hedeleg} and {\tt hideleg} CSRs allow these
-traps to be further delegated to a VS-mode guest; their layout is the same
-as {\tt medeleg} and {\tt mideleg}.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Synchronous Exceptions (\warl)} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor exception delegation register ({\tt hedeleg}).}
-\label{hedelegreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor interrupt delegation register ({\tt hideleg}).}
-\label{hidelegreg}
-\end{figure}
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|l|l|}
-\hline
-Bit & Attribute & Corresponding Exception \\
-\hline
- 0 & (See text) & Instruction address misaligned \\
- 1 & Writable & Instruction access fault \\
- 2 & Writable & Illegal instruction \\
- 3 & Writable & Breakpoint \\
- 4 & Writable & Load address misaligned \\
- 5 & Writable & Load access fault \\
- 6 & Writable & Store/AMO address misaligned \\
- 7 & Writable & Store/AMO access fault \\
- 8 & Writable & Environment call from U-mode or VU-mode \\
- 9 & Read-only 0 & Environment call from HS-mode \\
-10 & Read-only 0 & Environment call from VS-mode \\
-11 & Read-only 0 & Environment call from M-mode \\
-12 & Writable & Instruction page fault \\
-13 & Writable & Load page fault \\
-15 & Writable & Store/AMO page fault \\
-20 & Read-only 0 & Instruction guest-page fault \\
-21 & Read-only 0 & Load guest-page fault \\
-22 & Read-only 0 & Virtual instruction \\
-23 & Read-only 0 & Store/AMO guest-page fault \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Bits of {\tt hedeleg} that must be writable or must be read-only
-zero.}
-\label{tab:hedeleg-bits}
-\end{table*}
-
-A synchronous trap that has been delegated to HS-mode (using
-{\tt medeleg}) is further delegated to VS-mode if V=1 before the trap and
-the corresponding {\tt hedeleg} bit is set.
-Each bit of {\tt hedeleg} shall be either writable or read-only zero.
-Many bits of {\tt hedeleg} are required specifically to be writable or
-zero, as enumerated in Table~\ref{tab:hedeleg-bits}.
-Bit~0, corresponding to instruction address misaligned exceptions, must
-be writable if IALIGN=32.
-
-\begin{commentary}
-Requiring that certain bits of {\tt hedeleg} be writable reduces some of
-the burden on a hypervisor to handle variations of implementation.
-\end{commentary}
-
-An interrupt that has been delegated to HS-mode (using {\tt mideleg}) is
-further delegated to VS-mode if the corresponding {\tt hideleg} bit is
-set.
-Among bits 15:0 of {\tt hideleg}, bits 10, 6, and 2 (corresponding
-to the standard VS-level interrupts) are writable, and bits 12, 9, 5,
-and 1 (corresponding to the standard S-level interrupts) are read-only
-zeros.
-
-When a virtual supervisor external interrupt (code 10) is delegated to
-VS-mode, it is automatically translated by the machine into a supervisor
-external interrupt (code 9) for VS-mode, including the value written to
-{\tt vscause} on an interrupt trap.
-Likewise, a virtual supervisor timer interrupt (6) is translated into a
-supervisor timer interrupt (5) for VS-mode, and a virtual supervisor
-software interrupt (2) is translated into a supervisor software interrupt
-(1) for VS-mode.
-Similar translations may or may not be done for platform or custom
-interrupt causes (codes 16 and above).
-
-\subsection{Hypervisor Interrupt Registers ({\tt hvip}, {\tt hip}, and {\tt hie})}
-\label{sec:hinterruptregs}
-
-Register {\tt hvip} is an HSXLEN-bit read/write register that a
-hypervisor can write to indicate virtual interrupts intended for VS-mode.
-Bits of {\tt hvip} that are not writable are read-only zeros.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Virtual Interrupts (\warl)} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor virtual-interrupt-pending register ({\tt hvip}).}
-\label{hvipreg}
-\end{figure}
-
-The standard portion (bits 15:0) of {\tt hvip} is formatted as shown in
-Figure~\ref{hvipreg-standard}.
-Bits VSEIP, VSTIP, and VSSIP of {\tt hvip} are writable.
-Setting VSEIP=1 in {\tt hvip} asserts a VS-level external interrupt;
-setting VSTIP asserts a VS-level timer interrupt; and setting VSSIP
-asserts a VS-level software interrupt.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{RcFcFcW}
-\instbitrange{15}{11} &
-\instbit{10} &
-\instbitrange{9}{7} &
-\instbit{6} &
-\instbitrange{5}{3} &
-\instbit{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{VSEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSTIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-5 & 1 & 3 & 1 & 3 & 1 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt hvip}.}
-\label{hvipreg-standard}
-\end{figure*}
-
-Registers {\tt hip} and {\tt hie} are HSXLEN-bit read/write registers
-that supplement HS-level's {\tt sip} and {\tt sie} respectively.
-The {\tt hip} register indicates pending VS-level and hypervisor-specific
-interrupts, while {\tt hie} contains enable bits for the same interrupts.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor interrupt-pending register ({\tt hip}).}
-\label{hipreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor interrupt-enable register ({\tt hie}).}
-\label{hiereg}
-\end{figure}
-
-For each writable bit in {\tt sie}, the corresponding bit shall be
-read-only zero in both {\tt hip} and {\tt hie}.
-Hence, the nonzero bits in {\tt sie} and {\tt hie} are always mutually
-exclusive, and likewise for {\tt sip} and {\tt hip}.
-
-\begin{commentary}
-The active bits of {\tt hip} and {\tt hie} cannot be placed in HS-level's
-{\tt sip} and {\tt sie} because doing so would make it impossible for
-software to emulate the hypervisor extension on platforms that do not
-implement it in hardware.
-\end{commentary}
-
-An interrupt~\textit{i} will trap to HS-mode whenever all of the
-following are true:
-(a)~either the current operating mode is HS-mode and the SIE bit in the
-{\tt sstatus} register is set, or the current operating mode has less
-privilege than HS-mode;
-(b)~bit~\textit{i} is set in both {\tt sip} and {\tt sie}, or in both
-{\tt hip} and {\tt hie}; and
-(c)~bit~\textit{i} is not set in {\tt hideleg}.
-
-If bit~\textit{i} of {\tt sie} is read-only zero, the same bit in
-register {\tt hip} may be writable or may be read-only.
-When bit~\textit{i} in {\tt hip} is writable, a pending interrupt
-\textit{i} can be cleared by writing 0 to this bit.
-If interrupt \textit{i} can become pending in {\tt hip} but
-bit~\textit{i} in {\tt hip} is read-only, then either
-the interrupt can be cleared by clearing bit~\textit{i}
-of {\tt hvip}, or the implementation must provide
-some other mechanism for clearing the pending interrupt (which may
-involve a call to the execution environment).
-
-A bit in {\tt hie} shall be writable if the corresponding interrupt can
-ever become pending in {\tt hip}.
-Bits of {\tt hie} that are not writable shall be read-only zero.
-
-The standard portions (bits 15:0) of registers {\tt hip} and {\tt hie}
-are formatted as shown in Figures \ref{hipreg-standard} and
-\ref{hiereg-standard} respectively.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{FcccFcFcW}
-\instbitrange{15}{13} &
-\instbit{12} &
-\instbit{11} &
-\instbit{10} &
-\instbitrange{9}{7} &
-\instbit{6} &
-\instbitrange{5}{3} &
-\instbit{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SGEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSTIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-3 & 1 & 1 & 1 & 3 & 1 & 3 & 1 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt hip}.}
-\label{hipreg-standard}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{FcccFcFcW}
-\instbitrange{15}{13} &
-\instbit{12} &
-\instbit{11} &
-\instbit{10} &
-\instbitrange{9}{7} &
-\instbit{6} &
-\instbitrange{5}{3} &
-\instbit{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SGEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSTIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{VSSIE} &
-\multicolumn{1}{c|}{0} \\
-\hline
-3 & 1 & 1 & 1 & 3 & 1 & 3 & 1 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt hie}.}
-\label{hiereg-standard}
-\end{figure*}
-
-Bits {\tt hip}.SGEIP and {\tt hie}.SGEIE are the interrupt-pending and
-interrupt-enable bits for guest external interrupts at supervisor level
-(HS-level).
-SGEIP is read-only in {\tt hip}, and is 1 if and only if the bitwise
-logical-AND of CSRs {\tt hgeip} and {\tt hgeie} is nonzero in any bit.
-(See Section~\ref{sec:hgeinterruptregs}.)
-
-Bits {\tt hip}.VSEIP and {\tt hie}.VSEIE are the interrupt-pending and
-interrupt-enable bits for VS-level external interrupts.
-VSEIP is read-only in {\tt hip}, and is the logical-OR of these interrupt
-sources:
-\begin{tightlist}
-\item
-bit VSEIP of {\tt hvip};
-\item
-the bit of {\tt hgeip} selected by {\tt hstatus}.VGEIN; and
-\item
-any other platform-specific external interrupt signal directed to
-VS-level.
-\end{tightlist}
-
-Bits {\tt hip}.VSTIP and {\tt hie}.VSTIE are the interrupt-pending and
-interrupt-enable bits for VS-level timer interrupts.
-VSTIP is read-only in {\tt hip}, and is the logical-OR of
-{\tt hvip}.VSTIP and any other platform-specific timer interrupt signal
-directed to VS-level.
-
-Bits {\tt hip}.VSSIP and {\tt hie}.VSSIE are the interrupt-pending and
-interrupt-enable bits for VS-level software interrupts.
-VSSIP in {\tt hip} is an alias (writable) of the same bit in {\tt hvip}.
-
-Multiple simultaneous interrupts destined for HS-mode are handled in the
-following decreasing priority order: SEI, SSI, STI, SGEI, VSEI, VSSI, VSTI.
-
-\subsection{Hypervisor Guest External Interrupt Registers ({\tt hgeip} and {\tt hgeie})}
-\label{sec:hgeinterruptregs}
-
-The {\tt hgeip} register is an HSXLEN-bit read-only register, formatted
-as shown in Figure~\ref{hgeipreg}, that indicates pending guest external
-interrupts for this hart.
-The {\tt hgeie} register is an HSXLEN-bit read/write register, formatted
-as shown in Figure~\ref{hgeiereg}, that contains enable bits for the
-guest external interrupts at this hart.
-Guest external interrupt number \textit{i} corresponds with
-bit~\textit{i} in both {\tt hgeip} and {\tt hgeie}.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}Jc}
-\instbitrange{HSXLEN-1}{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{Guest External Interrupts} &
-\multicolumn{1}{c|}{0} \\
-\hline
-HSXLEN-1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor guest external interrupt-pending register ({\tt hgeip}).}
-\label{hgeipreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}Jc}
-\instbitrange{HSXLEN-1}{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{Guest External Interrupts (\warl)} &
-\multicolumn{1}{c|}{0} \\
-\hline
-HSXLEN-1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor guest external interrupt-enable register ({\tt hgeie}).}
-\label{hgeiereg}
-\end{figure}
-
-Guest external interrupts represent interrupts directed to individual
-virtual machines at VS-level.
-If a RISC-V platform supports placing a physical device under the direct
-control of a guest OS with minimal hypervisor intervention (known as
-\emph{pass-through} or \emph{direct assignment} between a virtual machine
-and the physical device), then, in such circumstance, interrupts from the
-device are intended for a specific virtual machine.
-Each bit of {\tt hgeip} summarizes \emph{all} pending interrupts directed
-to one virtual hart, as collected and reported by an interrupt
-controller.
-To distinguish specific pending interrupts from multiple devices,
-software must query the interrupt controller.
-
-\begin{commentary}
-Support for guest external interrupts requires an interrupt controller
-that can collect virtual-machine-directed interrupts separately from
-other interrupts.
-\end{commentary}
-
-The number of bits implemented in {\tt hgeip} and {\tt hgeie} for guest
-external interrupts is \unspecified\ and may be zero.
-This number is known as \textit{GEILEN}.
-The least-significant bits are implemented first, apart from bit~0.
-Hence, if GEILEN is nonzero, bits GEILEN:1 shall be writable in
-{\tt hgeie}, and all other bit positions shall be read-only zeros in
-both {\tt hgeip} and {\tt hgeie}.
-
-\begin{commentary}
-The set of guest external interrupts received and handled at one physical
-hart may differ from those received at other harts.
-Guest external interrupt number~\textit{i} at one physical hart is
-typically expected not to be the same as guest external
-interrupt~\textit{i} at any other hart.
-For any one physical hart, the maximum number of virtual harts that may
-directly receive guest external interrupts is limited by GEILEN.
-The maximum this number can be for any implementation is 31 for RV32 and
-63 for RV64, per physical hart.
-
-A hypervisor is always free to \emph{emulate} devices for any number of
-virtual harts without being limited by GEILEN.
-Only direct pass-through (direct assignment) of interrupts is affected by
-the GEILEN limit, and the limit is on the number of virtual harts
-receiving such interrupts, not the number of distinct interrupts
-received.
-The number of distinct interrupts a single virtual hart may receive is
-determined by the interrupt controller.
-\end{commentary}
-
-Register {\tt hgeie} selects the subset of guest external interrupts that
-cause a supervisor-level (HS-level) guest external interrupt.
-The enable bits in {\tt hgeie} do not affect the VS-level external
-interrupt signal selected from {\tt hgeip} by {\tt hstatus}.VGEIN.
-
-\subsection{%
- Hypervisor Environment Configuration Registers
- ({\tt henvcfg} and {\tt henvcfgh})%
-}
-
-The {\tt henvcfg} CSR is an HSXLEN-bit read/write register,
-formatted for HSXLEN=64 as shown in Figure~\ref{fig:henvcfg},
-that controls certain
-characteristics of the execution environment when virtualization mode
-V=1.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{cc@{}Mcc@{}W@{}Wc}
-\instbit{63} &
-\instbit{62} &
-\instbitrange{61}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbitrange{5}{4} &
-\instbitrange{3}{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{STCE} &
-\multicolumn{1}{c|}{PBMTE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{CBZE} &
-\multicolumn{1}{c|}{CBCFE} &
-\multicolumn{1}{c|}{CBIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{FIOM} \\
-\hline
-1 & 1 & 54 & 1 & 1 & 2 & 3 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor environment configuration register ({\tt henvcfg}) for HSXLEN=64.}
-\label{fig:henvcfg}
-\end{figure}
-
-If bit FIOM (Fence of I/O implies Memory) is set to one in
-{\tt henvcfg}, FENCE instructions executed when V=1 are modified
-so the requirement to order accesses to device I/O implies also the
-requirement to order main memory accesses.
-Table~\ref{tab:henvcfg-FIOM} details the modified interpretation of
-FENCE instruction bits PI, PO, SI, and SO when FIOM=1 and V=1.
-
-Similarly, when FIOM=1 and V=1,
-if an atomic instruction that accesses a region ordered as device I/O
-has its {\em aq} and/or {\em rl} bit set, then that instruction is ordered
-as though it accesses both device I/O and memory.
-
-\begin{table}[h!]
-\begin{center}
-\begin{tabular}{|c|l|}
-\hline
-Instruction bit & Meaning when set \\
-\hline
-PI & Predecessor device input and memory reads (PR implied) \\
-PO & Predecessor device output and memory writes (PW implied) \\
-\hline
-SI & Successor device input and memory reads (SR implied) \\
-SO & Successor device output and memory writes (SW implied) \\
-\hline
-\end{tabular}
-\end{center}
-\vspace{-0.1in}
-\caption{%
-Modified interpretation of FENCE predecessor and successor sets when
-FIOM=1 and virtualization mode V=1.%
-}
-\label{tab:henvcfg-FIOM}
-\end{table}
-
-The PBMTE bit controls whether the Svpbmt extension is available for use in
-VS-stage address translation.
-When PBMTE=1, Svpbmt is available for VS-stage address translation.
-When PBMTE=0, the implementation behaves as though Svpbmt were not implemented
-for VS-stage address translation.
-If Svpbmt is not implemented, PBMTE is read-only zero.
-
-The definition of the STCE field will be furnished by the
-forthcoming Sstc extension.
-Its allocation within {\tt henvcfg} may change prior to the ratification
-of that extension.
-
-The definition of the CBZE field will be furnished by the
-forthcoming Zicboz extension.
-Its allocation within {\tt henvcfg} may change prior to the ratification
-of that extension.
-
-The definitions of the CBCFE and CBIE fields will be furnished by the
-forthcoming Zicbom extension.
-Their allocations within {\tt henvcfg} may change prior to the ratification
-of that extension.
-
-When HSXLEN=32, {\tt henvcfg} contains the same fields as bits 31:0
-of {\tt henvcfg} when HSXLEN=64.
-Additionally, when HSXLEN=32, {\tt henvcfgh} is a 32-bit read/write register that
-contains the same fields as bits 63:32 of {\tt henvcfg} when
-HSXLEN=64.
-Register {\tt henvcfgh} does not exist when HSXLEN=64.
-
-\subsection{Hypervisor Counter-Enable Register ({\tt hcounteren})}
-
-The counter-enable register {\tt hcounteren} is a 32-bit register that
-controls the availability of the hardware performance monitoring counters
-to the guest virtual machine.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cccMcccccc}
-\instbit{31} &
-\instbit{30} &
-\instbit{29} &
-\instbitrange{28}{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{HPM31} &
-\multicolumn{1}{c|}{HPM30} &
-\multicolumn{1}{c|}{HPM29} &
-\multicolumn{1}{c|}{...} &
-\multicolumn{1}{c|}{HPM5} &
-\multicolumn{1}{c|}{HPM4} &
-\multicolumn{1}{c|}{HPM3} &
-\multicolumn{1}{c|}{IR} &
-\multicolumn{1}{c|}{TM} &
-\multicolumn{1}{c|}{CY} \\
-\hline
-1 & 1 & 1 & 23 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor counter-enable register ({\tt hcounteren}).}
-\label{hcounteren}
-\end{figure*}
-
-When the CY, TM, IR, or HPM{\em n} bit in the {\tt hcounteren} register
-is clear, attempts to read the {\tt cycle}, {\tt time}, {\tt instret}, or
-{\tt hpmcounter}{\em n} register while V=1 will cause a virtual
-instruction exception if the same bit in {\tt mcounteren} is~1.
-When one of these bits is set, access to the corresponding register is
-permitted when V=1, unless prevented for some other reason.
-In VU-mode, a counter is not readable unless the applicable bits are set
-in both {\tt hcounteren} and {\tt scounteren}.
-
-{\tt hcounteren} must be implemented.
-However, any of the bits may be read-only zero,
-indicating reads to the corresponding counter will cause an exception
-when V=1.
-Hence, they are effectively \warl\ fields.
-
-\subsection{Hypervisor Time Delta Registers ({\tt htimedelta}, {\tt htimedeltah})}
-
-The {\tt htimedelta} CSR is a read/write register that contains the delta
-between the value of the {\tt time} CSR and the value returned in VS-mode or
-VU-mode.
-That is, reading the {\tt time} CSR in VS or VU mode returns the sum of the
-contents of {\tt htimedelta} and the actual value of {\tt time}.
-
-\begin{commentary}
-Because overflow is ignored when summing {\tt htimedelta} and {\tt time},
-large values of {\tt htimedelta} may be used to represent negative time
-offsets.
-\end{commentary}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{63}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt htimedelta} \\
-\hline
-64 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor time delta register, HSXLEN=64.}
-\label{hdeltareg}
-\end{figure*}
-
-For HSXLEN=32 only, {\tt htimedelta} holds the lower 32 bits of the
-delta, and {\tt htimedeltah} holds the upper 32 bits of the delta.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{31}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt htimedelta} \\
-\hline
-\multicolumn{1}{|c|}{\tt htimedeltah} \\
-\hline
-32 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor time delta registers, HSXLEN=32.}
-\label{hdeltahreg}
-\end{figure*}
-
-\subsection{Hypervisor Trap Value Register ({\tt htval})}
-
-The {\tt htval} register is an HSXLEN-bit read/write register formatted
-as shown in Figure~\ref{htvalreg}.
-When a trap is taken into HS-mode, {\tt htval} is written with additional
-exception-specific information, alongside {\tt stval}, to assist software
-in handling the trap.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt htval} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor trap value register ({\tt htval}).}
-\label{htvalreg}
-\end{figure*}
-
-When a guest-page-fault trap is taken into HS-mode, {\tt htval} is
-written with either zero or the guest physical address that faulted,
-shifted right by 2~bits.
-For other traps, {\tt htval} is set to zero, but a future standard or
-extension may redefine {\tt htval}'s setting for other traps.
-
-A guest-page fault may arise due to an implicit memory access during
-first-stage (VS-stage) address translation, in which case a guest
-physical address written to {\tt htval} is that of the implicit memory
-access that faulted---for example, the address of a VS-level page table
-entry that could not be read.
-(The guest physical address corresponding to the original virtual address
-is unknown when VS-stage translation fails to complete.)
-Additional information is provided in CSR {\tt htinst} to disambiguate
-such situations.
-
-Otherwise, for misaligned loads and stores that cause guest-page faults,
-a nonzero guest physical address in {\tt htval} corresponds to the
-faulting portion of the access as indicated by the virtual address in
-{\tt stval}.
-For instruction guest-page faults on systems with variable-length
-instructions, a nonzero {\tt htval} corresponds to the faulting portion
-of the instruction as indicated by the virtual address in {\tt stval}.
-
-\begin{commentary}
-A guest physical address written to {\tt htval} is shifted right by
-2~bits to accommodate addresses wider than the current XLEN.
-For RV32, the hypervisor extension permits guest physical addresses as
-wide as 34 bits, and {\tt htval} reports bits 33:2 of the address.
-This shift-by-2 encoding of guest physical addresses matches the encoding
-of physical addresses in PMP address registers (Section~\ref{sec:pmp})
-and in page table entries (Sections \ref{sec:sv32}, \ref{sec:sv39},
-\ref{sec:sv48}, and~\ref{sec:sv57}).
-
-If the least-significant two bits of a faulting guest physical address
-are needed, these bits are ordinarily the same as the least-significant
-two bits of the faulting virtual address in {\tt stval}.
-For faults due to implicit memory accesses for VS-stage address
-translation, the least-significant two bits are instead zeros.
-These cases can be distinguished using the value provided in register
-{\tt htinst}.
-\end{commentary}
-
-{\tt htval} is a \warl\ register that must be able to hold zero and may
-be capable of holding only an arbitrary subset of other 2-bit-shifted
-guest physical addresses, if any.
-
-\begin{commentary}
-Unless it has reason to assume otherwise (such as a platform standard),
-software that writes a value to {\tt htval} should read back from
-{\tt htval} to confirm the stored value.
-\end{commentary}
-
-\subsection{Hypervisor Trap Instruction Register ({\tt htinst})}
-
-The {\tt htinst} register is an HSXLEN-bit read/write register formatted
-as shown in Figure~\ref{htinstreg}.
-When a trap is taken into HS-mode, {\tt htinst} is written with a value
-that, if nonzero, provides information about the instruction that
-trapped, to assist software in handling the trap.
-The values that may be written to {\tt htinst} on a trap are documented
-in Section~\ref{sec:tinst-vals}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{HSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt htinst} \\
-\hline
-HSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor trap instruction register ({\tt htinst}).}
-\label{htinstreg}
-\end{figure*}
-
-{\tt htinst} is a \warl\ register that need only be able to hold the
-values that the implementation may automatically write to it on a trap.
-
-\subsection{Hypervisor Guest Address Translation and Protection Register ({\tt hgatp})}
-\label{sec:hgatp}
-
-The {\tt hgatp} register is an HSXLEN-bit read/write register, formatted as
-shown in Figure~\ref{rv32hgatp} for HSXLEN=32 and Figure~\ref{rv64hgatp} for
-HSXLEN=64, which controls G-stage address translation and protection, the
-second stage of two-stage translation for guest virtual addresses (see
-Section~\ref{sec:two-stage-translation}).
-Similar to CSR {\tt satp}, this register holds the physical page number (PPN)
-of the guest-physical root page table; a virtual machine identifier (VMID),
-which facilitates address-translation fences on a per-virtual-machine basis;
-and the MODE field, which selects the address-translation scheme for guest
-physical addresses.
-When {\tt mstatus}.TVM=1, attempts to read or write {\tt hgatp} while executing
-in HS-mode will raise an illegal instruction exception.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{cY@{}E@{}K}
-\instbit{31} &
-\instbitrange{30}{29} &
-\instbitrange{28}{22} &
-\instbitrange{21}{0} \\
-\hline
-\multicolumn{1}{|c|}{MODE} &
-\multicolumn{1}{c|}{0 (\warl)} &
-\multicolumn{1}{c|}{VMID (\warl)} &
-\multicolumn{1}{c|}{PPN (\warl)} \\
-\hline
-1 & 2 & 7 & 22 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor guest address translation and protection register
-{\tt hgatp} when HSXLEN=32.}
-\label{rv32hgatp}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}S@{}Y@{}E@{}K}
-\instbitrange{63}{60} &
-\instbitrange{59}{58} &
-\instbitrange{57}{44} &
-\instbitrange{43}{0} \\
-\hline
-\multicolumn{1}{|c|}{MODE (\warl)} &
-\multicolumn{1}{c|}{0 (\warl)} &
-\multicolumn{1}{c|}{VMID (\warl)} &
-\multicolumn{1}{c|}{PPN (\warl)} \\
-\hline
-4 & 2 & 14 & 44 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hypervisor guest address translation and protection register
-{\tt hgatp} when HSXLEN=64, for MODE values Bare, Sv39x4, Sv48x4, and Sv57x4.}
-\label{rv64hgatp}
-\end{figure}
-
-Table~\ref{tab:hgatp-mode} shows the encodings of the MODE field when HSXLEN=32 and
-HSXLEN=64.
-When MODE=Bare, guest physical addresses are equal to supervisor physical
-addresses, and there is no further memory protection for a guest virtual
-machine beyond the physical memory protection scheme described in
-Section~\ref{sec:pmp}.
-In this case, the remaining fields in {\tt hgatp} must be set to zeros.
-
-When HSXLEN=32, the only other valid setting for MODE is Sv32x4, which is a
-modification of the usual Sv32 paged virtual-memory scheme, extended to support
-34-bit guest physical addresses.
-When HSXLEN=64, modes Sv39x4, Sv48x4, and Sv57x4 are defined as modifications of the
-Sv39, Sv48, and Sv57 paged virtual-memory schemes.
-All of these paged virtual-memory schemes are described in
-Section~\ref{sec:guest-addr-translation}.
-
-The remaining MODE settings when HSXLEN=64 are reserved for future use and may define
-different interpretations of the other fields in {\tt hgatp}.
-
-\begin{table}[h]
-\begin{center}
-\begin{tabular}{|c|c|l|}
-\hline
-\multicolumn{3}{|c|}{HSXLEN=32} \\
-\hline
-Value & Name & Description \\
-\hline
-0 & Bare & No translation or protection. \\
-1 & Sv32x4 & Page-based 34-bit virtual addressing (2-bit extension of Sv32). \\
-\hline \hline
-\multicolumn{3}{|c|}{HSXLEN=64} \\
-\hline
-Value & Name & Description \\
-\hline
-0 & Bare & No translation or protection. \\
-1--7 & --- & {\em Reserved} \\
-8 & Sv39x4 & Page-based 41-bit virtual addressing (2-bit extension of Sv39). \\
-9 & Sv48x4 & Page-based 50-bit virtual addressing (2-bit extension of Sv48). \\
-10 & Sv57x4 & Page-based 59-bit virtual addressing (2-bit extension of Sv57). \\
-11--15 & --- & {\em Reserved} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of {\tt hgatp} MODE field.}
-\label{tab:hgatp-mode}
-\end{table}
-
-Implementations are not required to support all defined MODE
-settings when HSXLEN=64.
-
-A write to {\tt hgatp} with an unsupported MODE value is not ignored as it is
-for {\tt satp}.
-Instead, the fields of {\tt hgatp} are {\warl} in the normal way, when so
-indicated.
-
-As explained in Section~\ref{sec:guest-addr-translation}, for the paged
-virtual-memory schemes (Sv32x4, Sv39x4, Sv48x4, and Sv57x4), the root page table is
-16~KiB and must be aligned to a 16-KiB boundary.
-In these modes, the lowest two bits of the physical page number (PPN) in
-{\tt hgatp} always read as zeros.
-An implementation that supports only the defined paged virtual-memory schemes
-and/or Bare may make PPN[1:0] read-only zero.
-
-The number of VMID bits is \unspecified\ and may be zero.
-The number of implemented VMID bits, termed {\mbox {\em VMIDLEN}}, may be
-determined by writing one to every bit position in the VMID field, then reading
-back the value in {\tt hgatp} to see which bit positions in the VMID field hold
-a one.
-The least-significant bits of VMID are implemented first:
-that is, if VMIDLEN~$>$~0, VMID[VMIDLEN-1:0] is writable.
-The maximal value of VMIDLEN, termed VMIDMAX, is 7 for Sv32x4 or 14 for Sv39x4,
-Sv48x4, and Sv57x4.
-
-The {\tt hgatp} register is considered {\em active} for the purposes of the
-address-translation algorithm {\em unless} the effective privilege mode is U
-and {\tt hstatus}.HU=0.
-
-\begin{commentary}
-This definition simplifies the implementation of speculative execution of
-HLV, HLVX, and HSV instructions.
-\end{commentary}
-
-Note that writing {\tt hgatp} does not imply any ordering constraints between
-page-table updates and subsequent G-stage address translations.
-If the new virtual machine's guest physical page tables have been modified,
-or if a VMID is reused,
-it may be necessary to execute an HFENCE.GVMA instruction
-(see Section~\ref{sec:hfence.vma}) before or after writing {\tt hgatp}.
-
-\subsection{Virtual Supervisor Status Register ({\tt vsstatus})}
-
-The {\tt vsstatus} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt sstatus}, formatted as
-shown in Figure~\ref{vsstatusreg-rv32} when VSXLEN=32 and
-Figure~\ref{vsstatusreg} when VSXLEN=64.
-When V=1, {\tt vsstatus} substitutes for the usual {\tt sstatus}, so
-instructions that normally read or modify {\tt sstatus} actually access
-{\tt vsstatus} instead.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cEcccc}
-\\
-\instbit{31} &
-\instbitrange{30}{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{\wpri} &
- \\
-\hline
-1 & 11 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWWWWccccWcc}
-\\
-&
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor status register ({\tt vsstatus}) when VSXLEN=32.}
-\label{vsstatusreg-rv32}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cMFScccc}
-\\
-\instbit{VSXLEN-1} &
-\instbitrange{VSXLEN-2}{34} &
-\instbitrange{33}{32} &
-\instbitrange{31}{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UXL[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{\wpri} &
- \\
-\hline
-1 & VSXLEN-35 & 2 & 12 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWWWWccccWcc}
-\\
-&
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor status register ({\tt vsstatus}) when VSXLEN=64.}
-\label{vsstatusreg}
-\end{figure*}
-
-The UXL field controls the effective XLEN for VU-mode, which may differ
-from the XLEN for VS-mode (VSXLEN).
-When VSXLEN=32, the UXL field does not exist, and VU-mode XLEN=32.
-When VSXLEN=64, UXL is a \warl\ field that is encoded the same as the MXL
-field of {\tt misa}, shown in Table~\ref{misabase} on
-page~\pageref{misabase}.
-In particular, an implementation may make UXL be a read-only copy of
-field VSXL of {\tt hstatus}, forcing VU-mode XLEN=VSXLEN.
-
-If VSXLEN is changed from 32 to a wider width, and if field UXL is not
-restricted to a single value, it gets the value corresponding to the
-widest supported width not wider than the new VSXLEN.
-
-When V=1, both {\tt vsstatus}.FS and the HS-level {\tt sstatus}.FS are in
-effect. Attempts
-to execute a floating-point instruction when either field is 0 (Off) raise an
-illegal-instruction exception. Modifying the floating-point state when V=1
-causes both fields to be set to 3 (Dirty).
-
-\begin{commentary}
-For a hypervisor to benefit from the extension context status, it must
-have its own copy in the HS-level {\tt sstatus}, maintained independently
-of a guest OS running in VS-mode.
-While a version of the extension context status obviously must exist in
-{\tt vsstatus} for VS-mode, a hypervisor cannot rely on this version
-being maintained correctly, given that VS-level software can change
-{\tt vsstatus}.FS arbitrarily.
-If the HS-level {\tt sstatus}.FS were not independently active and
-maintained by the hardware in parallel with {\tt vsstatus}.FS while V=1,
-hypervisors would always be forced to conservatively swap all
-floating-point state when context-switching between virtual machines.
-\end{commentary}
-
-Similarly, when V=1, both {\tt vsstatus}.VS and the HS-level {\tt sstatus}.VS
-are in effect.
-Attempts to execute a vector instruction when either field is 0 (Off) raise an
-illegal-instruction exception.
-Modifying the vector state when V=1 causes both fields to be set to 3 (Dirty).
-
-Read-only fields SD and XS summarize the extension context status as it
-is visible to VS-mode only.
-For example, the value of the HS-level {\tt sstatus}.FS does not affect
-{\tt vsstatus}.SD.
-
-An implementation may make field UBE be a read-only copy of
-{\tt hstatus}.VSBE.
-
-When V=0, {\tt vsstatus} does not directly affect the behavior of the machine,
-unless a virtual-machine load/store (HLV, HLVX, or HSV)
-or the MPRV feature in the {\tt mstatus}
-register is used to execute a load or store
-{\em as though} V=1.
-
-\subsection{Virtual Supervisor Interrupt Registers ({\tt vsip} and {\tt vsie})}
-
-The {\tt vsip} and {\tt vsie} registers are VSXLEN-bit read/write
-registers that are VS-mode's versions of supervisor CSRs {\tt sip} and
-{\tt sie}, formatted as shown in Figures \ref{vsipreg} and \ref{vsiereg}
-respectively.
-When V=1, {\tt vsip} and {\tt vsie} substitute for the usual {\tt sip}
-and {\tt sie}, so instructions that normally read or modify
-{\tt sip}/{\tt sie} actually access {\tt vsip}/{\tt vsie} instead.
-However, interrupts directed to HS-level continue to be
-indicated in the HS-level {\tt sip} register, not in {\tt vsip}, when
-V=1.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{VSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-VSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor interrupt-pending register ({\tt vsip}).}
-\label{vsipreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{VSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-VSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor interrupt-enable register ({\tt vsie}).}
-\label{vsiereg}
-\end{figure}
-
-The standard portions (bits 15:0) of registers {\tt vsip} and {\tt vsie}
-are formatted as shown in Figures \ref{vsipreg-standard} and
-\ref{vsiereg-standard} respectively.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{ScFcFcc}
-\instbitrange{15}{10} &
-\instbit{9} &
-\instbitrange{8}{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-6 & 1 & 3 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt vsip}.}
-\label{vsipreg-standard}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{ScFcFcc}
-\instbitrange{15}{10} &
-\instbit{9} &
-\instbitrange{8}{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIE} &
-\multicolumn{1}{c|}{0} \\
-\hline
-6 & 1 & 3 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt vsie}.}
-\label{vsiereg-standard}
-\end{figure*}
-
-When bit 10 of {\tt hideleg} is zero, {\tt vsip}.SEIP and {\tt vsie}.SEIE
-are read-only zeros.
-Else, {\tt vsip}.SEIP and {\tt vsie}.SEIE are aliases of {\tt hip}.VSEIP
-and {\tt hie}.VSEIE.
-
-When bit 6 of {\tt hideleg} is zero, {\tt vsip}.STIP and {\tt vsie}.STIE
-are read-only zeros.
-Else, {\tt vsip}.STIP and {\tt vsie}.STIE are aliases of {\tt hip}.VSTIP
-and {\tt hie}.VSTIE.
-
-When bit 2 of {\tt hideleg} is zero, {\tt vsip}.SSIP and {\tt vsie}.SSIE
-are read-only zeros.
-Else, {\tt vsip}.SSIP and {\tt vsie}.SSIE are aliases of {\tt hip}.VSSIP
-and {\tt hie}.VSSIE.
-
-\subsection{Virtual Supervisor Trap Vector Base Address Register ({\tt vstvec})}
-
-The {\tt vstvec} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt stvec}, formatted as shown
-in Figure~\ref{vstvecreg}.
-When V=1, {\tt vstvec} substitutes for the usual {\tt stvec}, so
-instructions that normally read or modify {\tt stvec} actually access
-{\tt vstvec} instead.
-When V=0, {\tt vstvec} does not directly affect the behavior of the
-machine.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J@{}R}
-\instbitrange{VSXLEN-1}{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{BASE[VSXLEN-1:2] (\warl)} &
-\multicolumn{1}{c|}{MODE (\warl)} \\
-\hline
-VSXLEN-2 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor trap vector base address register ({\tt vstvec}).}
-\label{vstvecreg}
-\end{figure*}
-
-\subsection{Virtual Supervisor Scratch Register ({\tt vsscratch})}
-
-The {\tt vsscratch} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt sscratch}, formatted as
-shown in Figure~\ref{vsscratchreg}.
-When V=1, {\tt vsscratch} substitutes for the usual {\tt sscratch}, so
-instructions that normally read or modify {\tt sscratch} actually access
-{\tt vsscratch} instead.
-The contents of {\tt vsscratch} never directly affect the behavior of
-the machine.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{VSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt vsscratch} \\
-\hline
-VSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor scratch register ({\tt vsscratch}).}
-\label{vsscratchreg}
-\end{figure*}
-
-\subsection{Virtual Supervisor Exception Program Counter ({\tt vsepc})}
-
-The {\tt vsepc} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt sepc}, formatted as shown
-in Figure~\ref{vsepcreg}.
-When V=1, {\tt vsepc} substitutes for the usual {\tt sepc}, so
-instructions that normally read or modify {\tt sepc} actually access
-{\tt vsepc} instead.
-When V=0, {\tt vsepc} does not directly affect the behavior of the
-machine.
-
-{\tt vsepc} is a \warl\ register that must be able to hold the same set of
-values that {\tt sepc} can hold.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{VSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt vsepc} \\
-\hline
-VSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor exception program counter ({\tt vsepc}).}
-\label{vsepcreg}
-\end{figure*}
-
-\subsection{Virtual Supervisor Cause Register ({\tt vscause})}
-
-The {\tt vscause} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt scause}, formatted as shown
-in Figure~\ref{vscausereg}.
-When V=1, {\tt vscause} substitutes for the usual {\tt scause}, so
-instructions that normally read or modify {\tt scause} actually access
-{\tt vscause} instead.
-When V=0, {\tt vscause} does not directly affect the behavior of the
-machine.
-
-{\tt vscause} is a \wlrl\ register that must be able to hold the same set of
-values that {\tt scause} can hold.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}U}
-\instbit{VSXLEN-1} &
-\instbitrange{VSXLEN-2}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupt} &
-\multicolumn{1}{c|}{Exception Code (\wlrl)} \\
-\hline
-1 & VSXLEN-1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor cause register ({\tt vscause}).}
-\label{vscausereg}
-\end{figure*}
-
-\subsection{Virtual Supervisor Trap Value Register ({\tt vstval})}
-
-The {\tt vstval} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt stval}, formatted as shown
-in Figure~\ref{vstvalreg}.
-When V=1, {\tt vstval} substitutes for the usual {\tt stval}, so
-instructions that normally read or modify {\tt stval} actually access
-{\tt vstval} instead.
-When V=0, {\tt vstval} does not directly affect the behavior of the
-machine.
-
-{\tt vstval} is a \warl\ register that must be able to hold the same set of
-values that {\tt stval} can hold.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{VSXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt vstval} \\
-\hline
-VSXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor trap value register ({\tt vstval}).}
-\label{vstvalreg}
-\end{figure*}
-
-\subsection{Virtual Supervisor Address Translation and Protection Register ({\tt vsatp})}
-
-The {\tt vsatp} register is a VSXLEN-bit read/write register that is
-VS-mode's version of supervisor register {\tt satp}, formatted as shown
-in Figure~\ref{rv32vsatpreg} for VSXLEN=32 and Figure~\ref{rv64vsatpreg}
-for VSXLEN=64.
-When V=1, {\tt vsatp} substitutes for the usual {\tt satp}, so
-instructions that normally read or modify {\tt satp} actually access
-{\tt vsatp} instead.
-{\tt vsatp} controls VS-stage address translation, the first stage of
-two-stage translation for guest virtual addresses (see
-Section~\ref{sec:two-stage-translation}).
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}E@{}K}
-\instbit{31} &
-\instbitrange{30}{22} &
-\instbitrange{21}{0} \\
-\hline
-\multicolumn{1}{|c|}{MODE (\warl)} &
-\multicolumn{1}{c|}{ASID (\warl)} &
-\multicolumn{1}{c|}{PPN (\warl)} \\
-\hline
-1 & 9 & 22 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor address translation and protection register {\tt vsatp} when VSXLEN=32.}
-\label{rv32vsatpreg}
-\end{figure}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}S@{}T@{}U}
-\instbitrange{63}{60} &
-\instbitrange{59}{44} &
-\instbitrange{43}{0} \\
-\hline
-\multicolumn{1}{|c|}{MODE (\warl)} &
-\multicolumn{1}{c|}{ASID (\warl)} &
-\multicolumn{1}{c|}{PPN (\warl)} \\
-\hline
-4 & 16 & 44 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Virtual supervisor address translation and protection register {\tt vsatp} when VSXLEN=64, for MODE
-values Bare, Sv39, Sv48, and Sv57.}
-\label{rv64vsatpreg}
-\end{figure*}
-
-The {\tt vsatp} register is considered {\em active} for the purposes of the
-address-translation algorithm {\em unless} the effective privilege mode is U
-and {\tt hstatus}.HU=0.
-However, even when {\tt vsatp} is active, VS-stage page-table entries' A bits
-must not be set as a result of speculative execution, unless the effective
-privilege mode is VS or VU.
-
-\begin{commentary}
-In particular, virtual-machine load/store (HLV, HLVX, or HSV) instructions
-that are misspeculatively executed must not cause VS-stage A bits to be set.
-\end{commentary}
-
-When V=0, a write to {\tt vsatp} with an unsupported MODE value is either
-ignored as it is for {\tt satp}, or the fields of {\tt vsatp} are treated as {\warl} in the normal way.
-However, when V=1, a write to {\tt satp} with an unsupported MODE value
-{\em is} ignored and no write to {\tt vsatp} is effected.
-
-When V=0, {\tt vsatp} does not directly affect the behavior of the machine,
-unless a virtual-machine load/store (HLV, HLVX, or HSV)
-or the MPRV feature in the {\tt mstatus}
-register is used to execute a load or store
-{\em as though} V=1.
-
-\section{Hypervisor Instructions}
-
-The hypervisor extension adds virtual-machine load and store instructions
-and two privileged fence instructions.
-
-\subsection{Hypervisor Virtual-Machine Load and Store Instructions}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{@{}O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-HLV.\textit{width} & [U] & addr & PRIVM & dest & SYSTEM \\
-HLVX.HU/WU & HLVX & addr & PRIVM & dest & SYSTEM \\
-HSV.\textit{width} & src & addr & PRIVM & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The hypervisor virtual-machine load and store instructions are valid only
-in M-mode or HS-mode, or in U-mode when {\tt hstatus}.HU=1.
-Each instruction performs an explicit memory access as though V=1;
-i.e., with the address translation and protection, and the endianness,
-that apply to memory accesses in either VS-mode or VU-mode.
-Field SPVP of {\tt hstatus} controls the privilege level of the access.
-The explicit memory access is done as though in VU-mode when SPVP=0, and
-as though in VS-mode when SPVP=1.
-As usual when V=1, two-stage address translation is applied, and the
-HS-level {\tt sstatus}.SUM is ignored.
-HS-level {\tt sstatus}.MXR makes execute-only pages readable for
-both stages of address translation (VS-stage and G-stage), whereas
-{\tt vsstatus}.MXR affects only the first translation stage (VS-stage).
-
-For every RV32I or RV64I load instruction, LB, LBU, LH, LHU, LW, LWU,
-and LD, there is a corresponding virtual-machine load instruction:
-HLV.B, HLV.BU, HLV.H, HLV.HU, HLV.W, HLV.WU, and HLV.D.
-For every RV32I or RV64I store instruction, SB, SH, SW, and SD, there is
-a corresponding virtual-machine store instruction: HSV.B, HSV.H, HSV.W,
-and HSV.D.
-Instructions HLV.WU, HLV.D, and HSV.D are not valid for RV32, of course.
-
-Instructions HLVX.HU and HLVX.WU are the same as HLV.HU and HLV.WU,
-except that \textit{execute} permission takes the place of \textit{read}
-permission during address translation.
-That is, the memory being read must be executable in both stages of
-address translation, but read permission is not required.
-For the supervisor physical address that results from address
-translation, the supervisor physical memory attributes must grant both
-\textit{execute} and \textit{read} permissions.
-(The \textit{supervisor physical memory attributes} are the machine's
-physical memory attributes as modified by physical memory protection,
-Section~\ref{sec:pmp}, for supervisor level.)
-
-\begin{commentary}
-HLVX cannot override machine-level physical memory protection (PMP),
-so attempting to read memory that PMP designates as execute-only still
-results in an access-fault exception.
-
-Although HLVX instructions' explicit memory accesses require execute
-permissions, they still raise the same exceptions as other load instructions,
-rather than raising fetch exceptions instead.
-\end{commentary}
-
-HLVX.WU is valid for RV32, even though LWU and HLV.WU are not.
-(For RV32, HLVX.WU can be considered a variant of HLV.W, as sign
-extension is irrelevant for 32-bit values.)
-
-Attempts to execute a virtual-machine load/store instruction (HLV, HLVX,
-or HSV) when V=1 cause a virtual instruction trap.
-Attempts to execute one of these same instructions from U-mode when
-{\tt hstatus}.HU=0 cause an illegal instruction trap.
-
-\subsection{Hypervisor Memory-Management Fence Instructions}
-\label{sec:hfence.vma}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{@{}O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-HFENCE.VVMA & asid & vaddr & PRIV & 0 & SYSTEM \\
-HFENCE.GVMA & vmid & gaddr & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The hypervisor memory-management fence instructions, HFENCE.VVMA
-and HFENCE.GVMA, perform a function similar to SFENCE.VMA
-(Section~\ref{sec:sfence.vma}), except applying to the VS-level
-memory-management data structures controlled by CSR {\tt vsatp}
-(HFENCE.VVMA) or the guest-physical memory-management data structures
-controlled by CSR {\tt hgatp} (HFENCE.GVMA).
-Instruction SFENCE.VMA applies only to the memory-management data structures
-controlled by the current {\tt satp} (either the HS-level {\tt satp} when
-V=0 or {\tt vsatp} when V=1).
-
-HFENCE.VVMA is valid only in M-mode or HS-mode.
-Its effect is much the
-same as temporarily entering VS-mode and executing SFENCE.VMA.
-Executing an HFENCE.VVMA guarantees that any previous stores already visible
-to the current hart are ordered before all implicit reads by that
-hart done for VS-stage address translation for instructions that
-\begin{compactitem}
-\item
-are subsequent to the HFENCE.VVMA, and
-\item
-execute when {\tt hgatp}.VMID has the same setting as it did when HFENCE.VVMA
-executed.
-\end{compactitem}
-Implicit reads need not be ordered when {\tt hgatp}.VMID is different than at
-the time HFENCE.VVMA executed.
-If operand {\em rs1}$\neq${\tt x0}, it specifies a single guest virtual
-address, and if operand {\em rs2}$\neq${\tt x0}, it specifies a single guest
-address-space identifier
-(ASID).
-
-\begin{commentary}
-An HFENCE.VVMA instruction applies only to a single virtual machine, identified
-by the setting of {\tt hgatp}.VMID when HFENCE.VVMA executes.
-\end{commentary}
-
-When {\em rs2}$\neq${\tt x0}, bits XLEN-1:ASIDMAX of the value held in {\em
-rs2} are reserved for future standard use. Until their use is defined by a
-standard extension, they should be zeroed by software and ignored
-by current implementations.
-Furthermore, if ASIDLEN~$<$~ASIDMAX, the implementation shall ignore bits
-ASIDMAX-1:ASIDLEN of the value held in {\em rs2}.
-
-\begin{commentary}
-Simpler implementations of HFENCE.VVMA can ignore the guest virtual address in
-{\em rs1} and the guest ASID value in {\em rs2}, as well as {\tt hgatp}.VMID,
-and always perform a global fence for the VS-level memory management of all
-virtual machines, or even a global fence for all memory-management data
-structures.
-\end{commentary}
-
-Neither {\tt mstatus}.TVM nor {\tt hstatus}.VTVM causes HFENCE.VVMA to
-trap.
-
-HFENCE.GVMA is valid only in HS-mode when {\tt mstatus}.TVM=0, or in
-M-mode (irrespective of {\tt mstatus}.TVM).
-Executing an HFENCE.GVMA instruction guarantees that any previous stores
-already visible to the current hart are ordered before all implicit
-reads by that hart done for G-stage address translation for instructions
-that follow the HFENCE.GVMA.
-If operand {\em rs1}$\neq${\tt x0}, it specifies a single guest physical
-address, shifted right by 2~bits, and if operand {\em rs2}$\neq${\tt x0}, it
-specifies a single virtual machine identifier (VMID).
-
-\begin{commentary}
-Conceptually, an implementation might contain two address-translation caches:
-one that maps guest virtual addresses to guest physical addresses, and another
-that maps guest physical addresses to supervisor physical addresses.
-HFENCE.GVMA need not flush the former cache, but it must flush entries from
-the latter cache that match the HFENCE.GVMA's address and VMID arguments.
-
-More commonly, implementations contain address-translation caches that map
-guest virtual addresses directly to supervisor physical addresses, removing
-a level of indirection.
-For such implementations, any entry whose guest virtual address maps to
-a guest physical address that matches the HFENCE.GVMA's address and VMID
-arguments must be flushed.
-Selectively flushing entries in this fashion requires tagging them with
-the guest physical address, which is costly, and so a common technique is
-to flush all entries that match the HFENCE.GVMA's VMID argument, regardless
-of the address argument.
-\end{commentary}
-
-\begin{commentary}
-Like for a guest physical address written to {\tt htval} on a
-trap, a guest physical address specified in {\em rs1} is shifted
-right by 2~bits to accommodate addresses wider than the current XLEN.
-\end{commentary}
-
-When {\em rs2}$\neq${\tt x0}, bits XLEN-1:VMIDMAX of the value held in {\em
-rs2} are reserved for future standard use. Until their use is defined by a
-standard extension, they should be zeroed by software and ignored
-by current implementations.
-Furthermore, if VMIDLEN~$<$~VMIDMAX, the implementation shall ignore bits
-VMIDMAX-1:VMIDLEN of the value held in {\em rs2}.
-
-\begin{commentary}
-Simpler implementations of HFENCE.GVMA can ignore the guest physical address in
-{\em rs1} and the VMID value in {\em rs2} and always perform a global fence for
-the guest-physical memory management of all virtual machines, or even a global
-fence for all memory-management data structures.
-\end{commentary}
-
-If {\tt hgatp}.MODE is changed for a given VMID, an HFENCE.GVMA with
-{\em rs1}={\tt x0} (and {\em rs2} set to either {\tt x0} or the VMID) must
-be executed to order subsequent guest translations with the MODE
-change---even if the old MODE or new MODE is Bare.
-
-Attempts to execute HFENCE.VVMA or HFENCE.GVMA when V=1 cause a virtual
-instruction trap, while attempts to do the same in U-mode
-cause an illegal instruction trap.
-Attempting to execute HFENCE.GVMA in HS-mode when {\tt mstatus}.TVM=1
-also causes an illegal instruction trap.
-
-\section{Machine-Level CSRs}
-
-The hypervisor extension augments or modifies machine CSRs {\tt mstatus},
-{\tt mstatush}, {\tt mideleg}, {\tt mip}, and {\tt mie}, and
-adds CSRs {\tt mtval2} and {\tt mtinst}.
-
-\subsection{Machine Status Registers ({\tt mstatus} and {\tt mstatush})}
-
-The hypervisor extension adds two fields, MPV and GVA, to the
-machine-level {\tt mstatus} or {\tt mstatush} CSR, and modifies the
-behavior of several existing {\tt mstatus} fields.
-Figure~\ref{hypervisor-mstatus} shows the modified {\tt mstatus} register
-when the hypervisor extension is implemented and MXLEN=64.
-When MXLEN=32, the hypervisor extension adds MPV and GVA not to {\tt mstatus}
-but to {\tt mstatush}.
-Figure~\ref{hypervisor-mstatush} shows the {\tt mstatush} register when
-the hypervisor extension is implemented and MXLEN=32.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cMccccFFc}
-\\
-\instbit{MXLEN-1} &
-\instbitrange{MXLEN-2}{40} &
-\instbit{39} &
-\instbit{38} &
-\instbit{37} &
-\instbit{36} &
-\instbitrange{35}{34} &
-\instbitrange{33}{32} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MPV} &
-\multicolumn{1}{c|}{GVA} &
-\multicolumn{1}{c|}{MBE} &
-\multicolumn{1}{c|}{SBE} &
-\multicolumn{1}{c|}{SXL[1:0]} &
-\multicolumn{1}{c|}{UXL[1:0]} &
- \\
-\hline
-1 & MXLEN-41 & 1 & 1 & 1 & 1 & 2 & 2 & \\
-\end{tabular}
-\begin{tabular}{cEccccccWWc}
-\\
-&
-\instbitrange{31}{23} &
-\instbit{22} &
-\instbit{21} &
-\instbit{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
- \\
-\hline
- &
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{TSR} &
-\multicolumn{1}{c|}{TW} &
-\multicolumn{1}{c|}{TVM} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{MPRV} &
-\multicolumn{1}{c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
- \\
-\hline
- & 9 & 1 & 1 & 1 & 1 & 1 & 1 & 2 & 2 & \\
-\end{tabular}
-\begin{tabular}{cFWcccccccccc}
-\\
-&
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{MPP[1:0]} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{MPIE} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine status register ({\tt mstatus}) for RV64 when the hypervisor extension is implemented.}
-\label{hypervisor-mstatus}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{LccccF}
-\\
-\instbitrange{31}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbitrange{3}{0} \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{MPV} &
-\multicolumn{1}{c|}{GVA} &
-\multicolumn{1}{c|}{MBE} &
-\multicolumn{1}{c|}{SBE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
-24 & 1 & 1 & 1 & 1 & 4 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Additional machine status register ({\tt mstatush}) for RV32 when the hypervisor extension is implemented.
-The format of {\tt mstatus} is unchanged for RV32.}
-\label{hypervisor-mstatush}
-\end{figure*}
-
-The MPV bit (Machine Previous Virtualization Mode) is written by the implementation
-whenever a trap is taken into M-mode.
-Just as the MPP field is set to the (nominal) privilege
-mode at the time of the trap, the MPV bit is set to the value of the virtualization
-mode V at the time of the trap. When an MRET instruction is executed, the
-virtualization mode V is set to MPV, unless MPP=3, in which case V remains 0.
-
-Field GVA (Guest Virtual Address) is written by the implementation
-whenever a trap is taken into M-mode.
-For any trap (breakpoint, address misaligned,
-access fault, page fault, or guest-page fault) that writes
-a guest virtual address to {\tt mtval}, GVA is set to~1.
-For any other trap into M-mode, GVA is set to~0.
-
-The TSR and TVM fields of {\tt mstatus} affect execution only in HS-mode,
-not in VS-mode.
-The TW field affects execution in all modes except M-mode.
-
-Setting TVM=1 prevents HS-mode from accessing {\tt hgatp} or executing
-HFENCE.GVMA or HINVAL.GVMA, but has no effect on accesses to {\tt vsatp} or
-instructions HFENCE.VVMA or HINVAL.VVMA.
-
-\begin{commentary}
-TVM exists in {\tt mstatus} to allow machine-level software to modify
-the address translations managed by a supervisor-level OS, usually for
-the purpose of inserting another stage of address translation below
-that controlled by the OS.
-The instruction traps enabled by TVM=1 permit machine level
-to co-opt both {\tt satp} and {\tt hgatp} and substitute
-\emph{shadow page tables} that merge the OS's chosen page translations
-with M-level's lower-stage translations, all without the OS being
-aware.
-M-level software needs this ability not only to emulate the hypervisor
-extension if not already supported, but also to emulate any future
-\mbox{RISC-V} extensions that may modify or add address translation
-stages, perhaps, for example, to improve support for nested
-hypervisors, i.e., running hypervisors atop other hypervisors.
-
-However, setting TVM=1 does not cause traps for accesses to {\tt vsatp}
-or instructions HFENCE.VVMA or HINVAL.VVMA, or for any actions taken
-in VS-mode, because M-level software is not expected to need to involve
-itself in VS-stage address translation.
-For virtual machines, it should be sufficient, and in all likelihood
-faster as well, to leave VS-stage address translation alone and merge
-all other translation stages into G-stage shadow page tables controlled
-by {\tt hgatp}.
-This assumption does place some constraints on possible future
-\mbox{RISC-V} extensions that current machines will be able to emulate
-efficiently.
-\end{commentary}
-
-The hypervisor extension changes the behavior of the Modify Privilege field,
-MPRV, of {\tt mstatus}.
-When MPRV=0, translation and protection behave as normal.
-When MPRV=1, explicit memory accesses are translated and protected, and
-endianness is applied, as though the current virtualization mode were set
-to MPV and the current nominal privilege mode were set to MPP.
-Table~\ref{h-mprv} enumerates the cases.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c|c||p{4.5in}|}
- \hline
- MPRV & MPV & MPP & Effect \\ \hline \hline
- 0 & -- & -- & Normal access; current privilege mode applies. \\ \hline
- 1 & 0 & 0 & U-level access with HS-level translation and protection only. \\ \hline
- 1 & 0 & 1 & HS-level access with HS-level translation and protection only. \\ \hline
- 1 & -- & 3 & M-level access with no translation. \\ \hline
- 1 & 1 & 0 & VU-level access with two-stage translation and protection. The HS-level MXR bit makes any executable page readable. {\tt vsstatus}.MXR makes readable those pages marked executable at the VS translation stage, but only if readable at the guest-physical translation stage. \\ \hline
- 1 & 1 & 1 & VS-level access with two-stage translation and protection. The HS-level MXR bit makes any executable page readable. {\tt vsstatus}.MXR makes readable those pages marked executable at the VS translation stage, but only if readable at the guest-physical translation stage. {\tt vsstatus}.SUM applies instead of the HS-level SUM bit. \\ \hline
- \end{tabular}
-\end{center}
-\caption{Effect of MPRV on the translation and protection of explicit
-memory accesses.}
-\label{h-mprv}
-\end{table*}
-
-MPRV does not affect the virtual-machine load/store instructions, HLV,
-HLVX, and HSV.
-The explicit loads and stores of these instructions always act as though
-V=1 and the nominal privilege mode were {\tt hstatus}.SPVP, overriding MPRV.
-
-The {\tt mstatus} register is a superset of the HS-level {\tt sstatus}
-register but is not a superset of {\tt vsstatus}.
-
-\FloatBarrier
-
-\subsection{Machine Interrupt Delegation Register ({\tt mideleg})}
-
-When the hypervisor extension is implemented, bits 10, 6, and 2 of
-{\tt mideleg} (corresponding to the standard VS-level interrupts) are
-each read-only one.
-Furthermore, if any guest external interrupts are implemented (GEILEN is
-nonzero), bit~12 of {\tt mideleg} (corresponding to supervisor-level
-guest external interrupts) is also read-only one.
-VS-level interrupts and guest external interrupts are always delegated
-past M-mode to HS-mode.
-
-For bits of {\tt mideleg} that are zero, the corresponding bits in
-{\tt hideleg}, {\tt hip}, and {\tt hie} are read-only zeros.
-
-\subsection{Machine Interrupt Registers ({\tt mip} and {\tt mie})}
-
-The hypervisor extension gives registers {\tt mip} and {\tt mie}
-additional active bits for the hypervisor-added interrupts.
-Figures \ref{hypervisor-mipreg-standard} and
-\ref{hypervisor-miereg-standard} show the standard portions (bits 15:0)
-of registers {\tt mip} and {\tt mie} when the hypervisor extension is
-implemented.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{Yccccccccccccc}
-\instbitrange{15}{13} &
-\instbit{12} &
-\instbit{11} &
-\instbit{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SGEIP} &
-\multicolumn{1}{c|}{MEIP} &
-\multicolumn{1}{c|}{VSEIP} &
-\multicolumn{1}{c|}{SEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MTIP} &
-\multicolumn{1}{c|}{VSTIP} &
-\multicolumn{1}{c|}{STIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MSIP} &
-\multicolumn{1}{c|}{VSSIP} &
-\multicolumn{1}{c|}{SSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-3 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt mip}.}
-\label{hypervisor-mipreg-standard}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{Yccccccccccccc}
-\instbitrange{15}{13} &
-\instbit{12} &
-\instbit{11} &
-\instbit{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SGEIE} &
-\multicolumn{1}{c|}{MEIE} &
-\multicolumn{1}{c|}{VSEIE} &
-\multicolumn{1}{c|}{SEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MTIE} &
-\multicolumn{1}{c|}{VSTIE} &
-\multicolumn{1}{c|}{STIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MSIE} &
-\multicolumn{1}{c|}{VSSIE} &
-\multicolumn{1}{c|}{SSIE} &
-\multicolumn{1}{c|}{0} \\
-\hline
-3 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt mie}.}
-\label{hypervisor-miereg-standard}
-\end{figure*}
-
-Bits SGEIP, VSEIP, VSTIP, and VSSIP in {\tt mip} are aliases for the same bits
-in hypervisor CSR {\tt hip}, while SGEIE, VSEIE, VSTIE, and VSSIE in {\tt mie}
-are aliases for the same bits in {\tt hie}.
-
-\subsection{Machine Second Trap Value Register ({\tt mtval2})}
-
-The {\tt mtval2} register is an MXLEN-bit read/write register formatted
-as shown in Figure~\ref{mtval2reg}.
-When a trap is taken into M-mode, {\tt mtval2} is written with additional
-exception-specific information, alongside {\tt mtval}, to assist software
-in handling the trap.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mtval2} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine second trap value register ({\tt mtval2}).}
-\label{mtval2reg}
-\end{figure*}
-
-When a guest-page-fault trap is taken into M-mode, {\tt mtval2} is
-written with either zero or the guest physical address that faulted,
-shifted right by 2~bits.
-For other traps, {\tt mtval2} is set to zero, but a future standard or
-extension may redefine {\tt mtval2}'s setting for other traps.
-
-If a guest-page fault is due to an implicit memory access during
-first-stage (VS-stage) address translation, a guest physical address
-written to {\tt mtval2} is that of the implicit memory access that
-faulted.
-Additional information is provided in CSR {\tt mtinst} to disambiguate
-such situations.
-
-Otherwise, for misaligned loads and stores that cause guest-page faults,
-a nonzero guest physical address in {\tt mtval2} corresponds to the
-faulting portion of the access as indicated by the virtual address in
-{\tt mtval}.
-For instruction guest-page faults on systems with variable-length
-instructions, a nonzero {\tt mtval2} corresponds to the faulting portion
-of the instruction as indicated by the virtual address in {\tt mtval}.
-
-{\tt mtval2} is a \warl\ register that must be able to hold zero and may
-be capable of holding only an arbitrary subset of other 2-bit-shifted
-guest physical addresses, if any.
-
-\subsection{Machine Trap Instruction Register ({\tt mtinst})}
-
-The {\tt mtinst} register is an MXLEN-bit read/write register formatted
-as shown in Figure~\ref{mtinstreg}.
-When a trap is taken into M-mode, {\tt mtinst} is written with a value
-that, if nonzero, provides information about the instruction that
-trapped, to assist software in handling the trap.
-The values that may be written to {\tt mtinst} on a trap are documented
-in Section~\ref{sec:tinst-vals}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mtinst} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine trap instruction register ({\tt mtinst}).}
-\label{mtinstreg}
-\end{figure*}
-
-{\tt mtinst} is a \warl\ register that need only be able to hold the
-values that the implementation may automatically write to it on a trap.
-
-\section{Two-Stage Address Translation}
-\label{sec:two-stage-translation}
-
-Whenever the current virtualization mode V is 1,
-two-stage address translation and protection is in
-effect.
-For any virtual memory access, the original virtual address is
-converted in the first stage
-by VS-level address translation, as controlled by the {\tt vsatp}
-register, into a {\em guest physical address}.
-The guest physical address is then converted
-in the second stage by guest physical address
-translation, as controlled by the {\tt hgatp} register, into a supervisor
-physical address.
-The two stages are known also as VS-stage and G-stage translation.
-Although there is no option to disable two-stage address translation when V=1,
-either stage of translation can be effectively disabled by zeroing the
-corresponding {\tt vsatp} or {\tt hgatp} register.
-
-The {\tt vsstatus} field MXR, which makes execute-only pages readable, only
-overrides VS-stage page protection.
-Setting MXR at VS-level does not override guest-physical page protections.
-Setting MXR at HS-level, however, overrides both VS-stage and G-stage
-execute-only permissions.
-
-When V=1, memory accesses that would normally bypass address translation are
-subject to G-stage address translation alone.
-This includes memory accesses made in support of VS-stage address translation,
-such as reads and writes of VS-level page tables.
-
-Machine-level physical memory protection applies to supervisor physical
-addresses and is in effect regardless of virtualization mode.
-
-\subsection{Guest Physical Address Translation}
-\label{sec:guest-addr-translation}
-
-The mapping of guest physical addresses to supervisor physical addresses is
-controlled by CSR {\tt hgatp} (Section~\ref{sec:hgatp}).
-
-When the address translation scheme selected by the MODE field of {\tt hgatp}
-is Bare, guest physical addresses are equal to supervisor physical addresses
-without modification, and no memory protection applies in the trivial
-translation of guest physical addresses to supervisor physical addresses.
-
-When {\tt hgatp}.MODE specifies a translation scheme of Sv32x4, Sv39x4,
-Sv48x4, or Sv57x4, G-stage address translation is a variation on the usual
-page-based virtual address translation scheme of Sv32, Sv39, Sv48, or Sv57,
-respectively.
-In each case, the size of the incoming address is widened by 2~bits (to 34, 41,
-50, or 59 bits).
-To accommodate the 2~extra bits, the root page table (only) is expanded by a
-factor of four to be 16~KiB instead of the usual 4~KiB.
-Matching its larger size, the root page table also must be aligned to a 16~KiB
-boundary instead of the usual 4~KiB page boundary.
-Except as noted, all other aspects of Sv32, Sv39, Sv48, or Sv57 are adopted
-unchanged for G-stage translation.
-Non-root page tables and all page table entries (PTEs) have the same formats as
-documented in Sections \ref{sec:sv32}, \ref{sec:sv39}, \ref{sec:sv48},
-and~\ref{sec:sv57}.
-
-For Sv32x4, an incoming guest physical address is partitioned into a virtual
-page number (VPN) and page offset as shown in Figure~\ref{sv32x4va}.
-This partitioning is identical to that for an Sv32 virtual address as depicted
-in Figure~\ref{sv32va} (page~\pageref{sv32va}), except with 2 more bits at the
-high end in VPN[1].
-(Note that the fields of a partitioned guest physical address also correspond
-one-for-one with the structure that Sv32 assigns to a physical address,
-depicted in Figure~\ref{rv32va}.)
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}E}
-\instbitrange{33}{22} &
-\instbitrange{21}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-12 & 10 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv32x4 virtual address (guest physical address).}
-\label{sv32x4va}
-\end{figure*}
-
-For Sv39x4, an incoming guest physical address is partitioned as shown in
-Figure~\ref{sv39x4va}.
-This partitioning is identical to that for an Sv39 virtual address as depicted
-in Figure~\ref{sv39va} (page~\pageref{sv39va}), except with 2 more bits at the
-high end in VPN[2].
-Address bits 63:41 must all be zeros, or else a guest-page-fault
-exception occurs.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}O@{}O}
-\instbitrange{40}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-11 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv39x4 virtual address (guest physical address).}
-\label{sv39x4va}
-\end{figure*}
-
-For Sv48x4, an incoming guest physical address is partitioned as shown in
-Figure~\ref{sv48x4va}.
-This partitioning is identical to that for an Sv48 virtual address as depicted
-in Figure~\ref{sv48va} (page~\pageref{sv48va}), except with 2 more bits at the
-high end in VPN[3].
-Address bits 63:50 must all be zeros, or else a guest-page-fault
-exception occurs.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}O@{}O@{}O}
-\instbitrange{49}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[3]} &
-\multicolumn{1}{c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-11 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv48x4 virtual address (guest physical address).}
-\label{sv48x4va}
-\end{figure*}
-
-For Sv57x4, an incoming guest physical address is partitioned as shown in
-Figure~\ref{sv57x4va}.
-This partitioning is identical to that for an Sv57 virtual address as depicted
-in Figure~\ref{sv57va} (page~\pageref{sv57va}), except with 2 more bits at the
-high end in VPN[4].
-Address bits 63:59 must all be zeros, or else a guest-page-fault
-exception occurs.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}S@{}R@{}R@{}R@{}R@{}S}
-\instbitrange{58}{48} &
-\instbitrange{47}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[4]} &
-\multicolumn{1}{c|}{VPN[3]} &
-\multicolumn{1}{c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-11 & 9 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv57x4 virtual address (guest physical address).}
-\label{sv57x4va}
-\end{figure*}
-
-\begin{commentary}
-The page-based G-stage address translation scheme for RV32, Sv32x4, is
-defined to support a 34-bit guest physical address so that an RV32 hypervisor
-need not be limited in its ability to virtualize real 32-bit RISC-V machines,
-even those with 33-bit or 34-bit physical addresses.
-This may include the possibility of a machine virtualizing itself, if it
-happens to use 33-bit or 34-bit physical addresses.
-Multiplying the size and alignment of the root page table by a factor of four
-is the cheapest way to extend Sv32 to cover a 34-bit address.
-The possible wastage of 12~KiB for an unnecessarily large root page table is
-expected to be of negligible consequence for most (maybe all) real uses.
-
-A consistent ability to virtualize machines having as much as four times the
-physical address space as virtual address space is believed to be of some
-utility also for RV64.
-For a machine implementing 39-bit virtual addresses (Sv39), for example, this
-allows the hypervisor extension to support up to a 41-bit guest physical
-address space without either necessitating hardware support for 48-bit virtual
-addresses (Sv48) or falling back to emulating the larger address space using
-shadow page tables.
-\end{commentary}
-
-The conversion of an Sv32x4, Sv39x4, Sv48x4, or Sv57x4 guest physical address is
-accomplished with the same algorithm used for Sv32, Sv39, Sv48, or Sv57, as
-presented in Section~\ref{sv32algorithm}, except that:
-\begin{compactitem}
-\item
-{\tt hgatp} substitutes for the usual {\tt satp};
-\item
-for the translation to begin, the effective privilege mode must be VS-mode or
-VU-mode;
-\item
-when checking the U~bit, the current privilege mode is always taken to be
-U-mode; and
-\item
-guest-page-fault exceptions are raised instead of regular page-fault
-exceptions.
-\end{compactitem}
-
-For G-stage address translation, all memory accesses (including those
-made to access data structures for VS-stage address translation) are considered
-to be user-level accesses, as though executed in U-mode.
-Access type permissions---readable, writable, or executable---are checked
-during G-stage translation the same as for VS-stage
-translation.
-For a memory access made to support VS-stage address translation (such as to
-read/write a VS-level page table), permissions are checked as though for a load
-or store, not for the original access type.
-However, any exception is always reported for the original access type
-(instruction, load, or store/AMO).
-
-The G~bit in all G-stage PTEs is reserved for future standard use. Until its
-use is defined by a standard extension, it should be cleared
-by software for forward compatibility, and must be ignored by hardware.
-
-\begin{commentary}
-G-stage address translation uses the identical format for PTEs as
-regular address translation, even including the U~bit, due to the
-possibility of sharing some (or all) page tables between G-stage
-translation and regular HS-level address translation.
-Regardless of whether this usage will ever become common, we chose not to
-preclude it.
-\end{commentary}
-
-\subsection{Guest-Page Faults}
-
-Guest-page-fault traps may be delegated from M-mode to HS-mode under the
-control of CSR {\tt medeleg}, but cannot be delegated to other privilege
-modes.
-On a guest-page fault, CSR {\tt mtval} or {\tt stval} is written with the
-faulting guest virtual address as usual, and {\tt mtval2} or {\tt htval} is
-written either with zero or with the faulting guest physical address,
-shifted right by 2~bits.
-CSR {\tt mtinst} or {\tt htinst} may also be written with information
-about the faulting instruction or other reason for the access, as
-explained in Section~\ref{sec:tinst-vals}.
-
-When an instruction fetch or a misaligned memory access straddles a page
-boundary, two different address translations are involved.
-When a guest-page fault occurs in such a circumstance, the faulting
-virtual address written to {\tt mtval}/{\tt stval} is the same as would
-be required for a regular page fault.
-Thus, the faulting virtual address may be a page-boundary address that is
-higher than the instruction's original virtual address, if the byte at
-that page boundary is among the accessed bytes.
-
-When a guest-page fault is not due to an implicit
-memory access for VS-stage address translation,
-a nonzero guest physical address written to
-{\tt mtval2}/{\tt htval} shall correspond
-to the exact virtual address written to
-{\tt mtval}/{\tt stval}.
-
-\subsection{Memory-Management Fences}
-
-The behavior of the SFENCE.VMA instruction is affected by the current
-virtualization mode V. When V=0, the virtual-address argument is an HS-level
-virtual address, and the ASID argument is an HS-level ASID.
-The instruction orders stores only to HS-level address-translation structures
-with subsequent HS-level address translations.
-
-When V=1, the virtual-address argument to SFENCE.VMA is a guest virtual
-address within the current virtual machine, and the ASID argument is a VS-level
-ASID within the current virtual machine.
-The current virtual machine is identified by the VMID field of CSR {\tt hgatp},
-and the effective ASID can be considered to be the combination of this VMID
-with the VS-level ASID.
-The SFENCE.VMA instruction orders stores only to the VS-level
-address-translation structures with subsequent VS-stage address translations
-for the same virtual machine, i.e., only when {\tt hgatp}.VMID is the same as
-when the SFENCE.VMA executed.
-
-Hypervisor instructions HFENCE.VVMA and HFENCE.GVMA provide additional
-memory-management fences to complement SFENCE.VMA.
-These instructions are described in Section~\ref{sec:hfence.vma}.
-
-Section~\ref{pmp-vmem} discusses the intersection between physical memory
-protection (PMP) and page-based address translation.
-It is noted there that, when PMP settings are modified in a manner that affects
-either the physical memory that holds page tables or the physical memory to
-which page tables point, M-mode software must synchronize the PMP settings with
-the virtual memory system.
-For HS-level address translation, this is accomplished by executing in M-mode
-an SFENCE.VMA instruction with {\em rs1}={\tt x0} and {\em rs2}={\tt x0}, after
-the PMP CSRs are written.
-Synchronization with G-stage and VS-stage data structures is also needed.
-Executing an HFENCE.GVMA instruction with {\em rs1}={\tt x0} and
-{\em rs2}={\tt x0} suffices to flush all G-stage or VS-stage
-address-translation cache entries that have cached PMP settings
-corresponding to the final translated supervisor physical address.
-An HFENCE.VVMA instruction is not required.
-
-\section{Traps}
-
-\subsection{Trap Cause Codes}
-
-The hypervisor extension augments the trap cause encoding.
-Table~\ref{hcauses} lists the possible M-mode and HS-mode trap cause
-codes when the hypervisor extension is implemented.
-Codes are added for VS-level interrupts (interrupts 2, 6,~10), for
-supervisor-level guest external interrupts (interrupt~12), for virtual
-instruction exceptions (exception~22), and for guest-page faults
-(exceptions 20, 21,~23).
-Furthermore, environment calls from VS-mode are assigned cause 10,
-whereas those from HS-mode or S-mode use cause~9 as usual.
-
-\begin{table*}[p]
-\begin{center}
-\begin{tabular}{|r|r|l|l|}
- \hline
- Interrupt & Exception Code & Description \\
- \hline
- 1 & 0 & {\em Reserved} \\
- 1 & 1 & Supervisor software interrupt \\
- 1 & 2 & Virtual supervisor software interrupt \\
- 1 & 3 & Machine software interrupt \\ \hline
- 1 & 4 & {\em Reserved} \\
- 1 & 5 & Supervisor timer interrupt \\
- 1 & 6 & Virtual supervisor timer interrupt \\
- 1 & 7 & Machine timer interrupt \\ \hline
- 1 & 8 & {\em Reserved} \\
- 1 & 9 & Supervisor external interrupt \\
- 1 & 10 & Virtual supervisor external interrupt \\
- 1 & 11 & Machine external interrupt \\ \hline
- 1 & 12 & Supervisor guest external interrupt \\
- 1 & 13--15 & {\em Reserved} \\
- 1 & $\ge$16 & {\em Designated for platform or custom use} \\ \hline
- 0 & 0 & Instruction address misaligned \\
- 0 & 1 & Instruction access fault \\
- 0 & 2 & Illegal instruction \\
- 0 & 3 & Breakpoint \\
- 0 & 4 & Load address misaligned \\
- 0 & 5 & Load access fault \\
- 0 & 6 & Store/AMO address misaligned \\
- 0 & 7 & Store/AMO access fault \\
- 0 & 8 & Environment call from U-mode or VU-mode \\
- 0 & 9 & Environment call from HS-mode \\
- 0 & 10 & Environment call from VS-mode \\
- 0 & 11 & Environment call from M-mode \\
- 0 & 12 & Instruction page fault \\
- 0 & 13 & Load page fault \\
- 0 & 14 & {\em Reserved} \\
- 0 & 15 & Store/AMO page fault \\
- 0 & 16--19 & {\em Reserved} \\
- 0 & 20 & Instruction guest-page fault \\
- 0 & 21 & Load guest-page fault \\
- 0 & 22 & Virtual instruction \\
- 0 & 23 & Store/AMO guest-page fault \\
- 0 & 24--31 & {\em Designated for custom use} \\
- 0 & 32--47 & {\em Reserved} \\
- 0 & 48--63 & {\em Designated for custom use} \\
- 0 & $\ge$64 & {\em Reserved} \\
- \hline
-\end{tabular}
-\end{center}
-\caption{Machine and supervisor cause register ({\tt mcause} and
-{\tt scause}) values when the hypervisor extension is implemented.}
-\label{hcauses}
-\end{table*}
-
-\begin{commentary}
-HS-mode and VS-mode ECALLs use different cause values so they can be delegated
-separately.
-\end{commentary}
-
-When V=1, a virtual instruction exception (code 22) is normally
-raised instead of an illegal instruction exception if the attempted
-instruction is \textit{HS-qualified}
-but is prevented from executing when V=1 either due to
-insufficient privilege or because the instruction is expressly disabled
-by a supervisor or hypervisor CSR such as {\tt scounteren} or {\tt hcounteren}.
-An instruction is \textit{HS-qualified} if it would be valid to execute
-in HS-mode (for some values of the instruction's register operands),
-assuming fields TSR and TVM of CSR {\tt mstatus} are both zero.
-
-Special rules apply for CSR instructions that access \mbox{32-bit}
-high-half CSRs such as {\tt cycleh} and {\tt htimedeltah}.
-When V=1 and XLEN$>$32, an attempt to access a high-half
-supervisor-level CSR, high-half hypervisor CSR, high-half VS CSR,
-or high-half unprivileged CSR always raises an illegal instruction
-exception.
-And in VS-mode, if the XLEN for VU-mode is greater than 32, an attempt
-to access a high-half user-level CSR (distinct from an unprivileged
-CSR) always raises an illegal instruction exception.
-On the other hand, when V=1 and XLEN=32, an invalid attempt to access a
-high-half S-level, hypervisor, VS, or unprivileged CSR raises a virtual
-instruction exception instead of an illegal instruction exception
-if the same CSR instruction for the partner \textit{low-half} CSR
-(e.g.\@ {\tt cycle} or {\tt htimedelta}) is HS-qualified.
-Likewise, in VS-mode, if the XLEN for VU-mode is 32, an invalid attempt
-to access a high-half user-level CSR raises a virtual instruction
-exception instead of an illegal instruction exception if the same CSR
-instruction for the partner low-half CSR is HS-qualified.
-
-\begin{commentary}
-The RISC-V Privileged Architecture currently defines no user-level
-CSRs, but they might be added by a future version of this standard or
-by an extension.
-\end{commentary}
-
-Specifically, a virtual instruction exception is raised for the
-following cases:
-\begin{itemize}
-
-\item
-in VS-mode,
-attempts to access a non-high-half counter CSR when the corresponding bit in
-{\tt hcounteren} is~0 and the same bit in {\tt mcounteren} is~1;
-
-\item
-in VS-mode, if XLEN=32, attempts to access a high-half
-counter CSR when the corresponding bit in {\tt hcounteren} is~0 and the
-same bit in {\tt mcounteren} is~1;
-
-\item
-in VU-mode, attempts to access a non-high-half counter CSR when the
-corresponding bit in either {\tt hcounteren} or {\tt scounteren} is~0
-and the same bit in {\tt mcounteren} is~1;
-
-\item
-in VU-mode, if XLEN=32, attempts to access a high-half counter CSR when
-the corresponding bit in either {\tt hcounteren} or {\tt scounteren}
-is~0 and the same bit in {\tt mcounteren} is~1;
-
-\item
-in VS-mode or VU-mode,
-attempts to execute a hypervisor instruction (HLV, HLVX, HSV, or HFENCE);
-
-\item
-in VS-mode or VU-mode, attempts to access an implemented non-high-half
-hypervisor CSR or VS CSR when the same access (read/write) would be
-allowed in HS-mode, assuming {\tt mstatus}.TVM=0;
-
-\item
-in VS-mode or VU-mode, if XLEN=32, attempts to access an implemented
-high-half hypervisor CSR or high-half VS CSR when the same access
-(read/write) to the CSR's low-half partner would be allowed in HS-mode,
-assuming {\tt mstatus}.TVM=0;
-
-\item
-in VU-mode, attempts to execute WFI when {\tt mstatus}.TW=0, or to
-execute a supervisor instruction (SRET or SFENCE);
-
-\item
-in VU-mode, attempts to access an implemented non-high-half supervisor
-CSR when the same access (read/write) would be allowed in HS-mode,
-assuming {\tt mstatus}.TVM=0;
-
-\item
-in VU-mode, if XLEN=32, attempts to access an implemented high-half
-supervisor CSR when the same access to the CSR's low-half partner would
-be allowed in HS-mode, assuming {\tt mstatus}.TVM=0;
-
-\item
-in VS-mode, attempts to execute WFI when {\tt hstatus}.VTW=1 and
-{\tt mstatus}.TW=0, unless the instruction completes within an
-implementation-specific, bounded time;
-
-\item
-in VS-mode, attempts to execute SRET when {\tt hstatus}.VTSR=1; and
-
-\item
-in VS-mode, attempts to execute an SFENCE.VMA or SINVAL.VMA instruction or to
-access {\tt satp}, when {\tt hstatus}.VTVM=1.
-
-\end{itemize}
-Other extensions to the \mbox{RISC-V} Privileged Architecture may add
-to the set of circumstances that cause a virtual instruction exception
-when V=1.
-
-On a virtual instruction trap, {\tt mtval} or {\tt stval} is written the
-same as for an illegal instruction trap.
-
-\begin{commentary}
-It is not unusual that hypervisors must emulate the
-instructions that raise virtual instruction exceptions, to
-support nested hypervisors or for other reasons.
-Machine level is expected ordinarily to delegate virtual instruction
-traps directly to HS-level, whereas illegal instruction traps are likely
-to be processed first in M-mode before being conditionally delegated (by
-software) to HS-level.
-Consequently, virtual instruction traps are expected typically to be
-handled faster than illegal instruction traps.
-
-When not emulating the trapping instruction,
-a hypervisor should convert a virtual
-instruction trap into an illegal instruction exception for the guest
-virtual machine.
-\end{commentary}
-
-\begin{commentary}
-Because TSR and TVM in {\tt mstatus} are intended to impact only S-mode
-(HS-mode), they are ignored for determining exceptions in VS-mode.
-\end{commentary}
-
-\begin{table*}[htbp]
-\begin{center}
-\begin{tabular}{|l|r|l|}
- \hline
- Priority & Exc.\@ Code & Description \\
- \hline
- {\em Highest} & 3 & Instruction address breakpoint \\
- \hline
- & & During instruction address translation: \\
- & 12, 20, 1 & \quad First encountered page fault,
- guest-page fault, or access fault \\
- \hline
- & & With physical address for instruction: \\
- & 1 & \quad Instruction access fault \\
- \hline
- & 2 & Illegal instruction \\
- & 22 & Virtual instruction \\
- & 0 & Instruction address misaligned \\
- & 8, 9, 10, 11 & Environment call \\
- & 3 & Environment break \\
- & 3 & Load/store/AMO address breakpoint \\
- \hline
- & & Optionally: \\
- & 4, 6 & \quad Load/store/AMO address misaligned \\
- \hline
- & & During address translation for an explicit
- memory access: \\
- & 13, 15, 21, 23, 5, 7 & \quad First encountered page fault,
- guest-page fault, or access fault \\
- \hline
- & & With physical address for an explicit
- memory access: \\
- & 5, 7 & \quad Load/store/AMO access fault \\
- \hline
- & & If not higher priority: \\
- {\em Lowest} & 4, 6 & \quad Load/store/AMO address misaligned \\
- \hline
-\end{tabular}
-\end{center}
-\caption{%
-Synchronous exception priority when the hypervisor extension is
-implemented.%
-}
-\label{tab:HSyncExcPrio}
-\end{table*}
-
-If an instruction may raise multiple synchronous exceptions, the
-decreasing priority order of Table~\ref{tab:HSyncExcPrio} indicates
-which exception is taken and reported in {\tt mcause} or {\tt scause}.
-
-\FloatBarrier
-
-\subsection{Trap Entry}
-
-When a trap occurs in HS-mode or U-mode, it goes to M-mode, unless
-delegated by {\tt medeleg} or {\tt mideleg}, in which case it goes to HS-mode.
-When a trap occurs in VS-mode or VU-mode, it goes to M-mode, unless
-delegated by {\tt medeleg} or {\tt mideleg}, in which case it goes to HS-mode,
-unless further delegated by {\tt hedeleg} or {\tt hideleg}, in which case it
-goes to VS-mode.
-
-When a trap is taken into M-mode, virtualization mode V gets set to~0,
-and fields MPV and MPP in {\tt mstatus}
-(or {\tt mstatush}) are set according to
-Table~\ref{h-mpp}.
-A trap into M-mode also writes fields GVA, MPIE, and MIE in
-{\tt mstatus}/{\tt mstatush} and writes CSRs {\tt mepc}, {\tt mcause},
-{\tt mtval}, {\tt mtval2}, and {\tt mtinst}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|c|c|}
- \hline
- Previous Mode & MPV & MPP \\ \hline
- U-mode & 0 & 0 \\
- HS-mode & 0 & 1 \\
- M-mode & 0 & 3 \\ \hline
- VU-mode & 1 & 0 \\
- VS-mode & 1 & 1 \\ \hline
-\end{tabular}
-\end{center}
-\caption{Value of {\tt mstatus}/{\tt mstatush} fields MPV and MPP after a trap into M-mode.
-Upon trap return, MPV is ignored when MPP=3.}
-\label{h-mpp}
-\end{table*}
-
-When a trap is taken into HS-mode, virtualization mode V is set to~0,
-and {\tt hstatus}.SPV and {\tt sstatus}.SPP are
-set according to Table~\ref{h-spp}.
-If V was 1 before the trap, field SPVP in {\tt hstatus} is set the same as
-{\tt sstatus}.SPP;
-otherwise, SPVP is left unchanged.
-A trap into HS-mode also writes field GVA in {\tt hstatus}, fields
-SPIE and SIE in {\tt sstatus}, and CSRs {\tt sepc}, {\tt scause},
-{\tt stval}, {\tt htval}, and {\tt htinst}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|c|c|}
- \hline
- Previous Mode & SPV & SPP \\ \hline
- U-mode & 0 & 0 \\
- HS-mode & 0 & 1 \\ \hline
- VU-mode & 1 & 0 \\
- VS-mode & 1 & 1 \\ \hline
-\end{tabular}
-\end{center}
-\caption{Value of {\tt hstatus} field SPV and {\tt sstatus} field SPP after a trap into HS-mode.}
-\label{h-spp}
-\end{table*}
-
-When a trap is taken into VS-mode, {\tt vsstatus}.SPP is set according to
-Table~\ref{h-vspp}.
-Register {\tt hstatus} and the HS-level {\tt sstatus} are not modified,
-and the virtualization mode V remains~1.
-A trap into VS-mode also writes fields SPIE and SIE in
-{\tt vsstatus} and writes CSRs {\tt vsepc}, {\tt vscause}, and
-{\tt vstval}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|c|}
- \hline
- Previous Mode & SPP \\ \hline
- VU-mode & 0 \\
- VS-mode & 1 \\ \hline
-\end{tabular}
-\end{center}
-\caption{Value of {\tt vsstatus} field SPP after a trap into VS-mode.}
-\label{h-vspp}
-\end{table*}
-
-\FloatBarrier
-
-\subsection{Transformed Instruction or Pseudoinstruction for {\tt mtinst} or {\tt htinst}}
-\label{sec:tinst-vals}
-
-On any trap into M-mode or HS-mode, one of these values is written
-automatically into the appropriate trap instruction CSR, {\tt mtinst} or
-{\tt htinst}:
-\begin{tightlist}
-\item
-zero;
-\item
-a transformation of the trapping instruction;
-\item
-a custom value (allowed only if the trapping instruction is non-standard);
-or
-\item
-a special pseudoinstruction.
-\end{tightlist}
-Except when a pseudoinstruction value is required (described later), the
-value written to {\tt mtinst} or {\tt htinst} may always be zero,
-indicating that the hardware is providing no information in the register
-for this particular trap.
-
-\begin{commentary}
-The value written to the trap instruction CSR serves two purposes.
-The first is to improve the speed of instruction emulation in a trap
-handler, partly by allowing the handler to skip loading the trapping
-instruction from memory, and partly by obviating some of the work of
-decoding and executing the instruction.
-The second purpose is to supply, via pseudoinstructions, additional
-information about guest-page-fault exceptions caused by implicit memory
-accesses done for VS-stage address translation.
-
-A \emph{transformation} of the trapping instruction is written instead of
-simply a copy of the original instruction in order to minimize the burden
-for hardware yet still provide to a trap handler the information needed
-to emulate the instruction.
-An implementation may at any time reduce its effort by substituting zero
-in place of the transformed instruction.
-\end{commentary}
-
-On an interrupt, the value written to the trap instruction register is
-always zero.
-On a synchronous exception, if a nonzero value is written, one of the
-following shall be true about the value:
-
-\begin{itemize}
-
-\item
-Bit~0 is {\tt 1}, and replacing bit~1 with {\tt 1} makes the value into a
-valid encoding of a standard instruction.
-
-In this case, the instruction that trapped is the same kind as indicated
-by the register value, and the register value is the transformation of
-the trapping instruction, as defined later.
-For example, if bits 1:0 are binary {\tt 11} and the register value is
-the encoding of a standard LW (load word) instruction, then the trapping
-instruction is LW, and the register value is the transformation of the
-trapping LW instruction.
-
-\item
-Bit~0 is {\tt 1}, and replacing bit~1 with {\tt 1} makes the value into
-an instruction encoding that is explicitly designated for a custom
-instruction (\emph{not} an unused reserved encoding).
-
-This is a \textit{custom value}.
-The instruction that trapped is a non-standard instruction.
-The interpretation of a custom value is not otherwise specified by this
-standard.
-
-\item
-The value is one of the special pseudoinstructions defined later, all of
-which have bits 1:0 equal to {\tt 00}.
-
-\end{itemize}
-
-These three cases exclude a large number of other possible values, such
-as all those having bits 1:0 equal to binary {\tt 10}.
-A future standard or extension may define additional cases, thus allowing
-values that are currently excluded.
-Software may safely treat an unrecognized value in a trap instruction
-register the same as zero.
-
-\begin{commentary}
-To be forward-compatible with future revisions of this standard, software
-that interprets a nonzero value from {\tt mtinst} or {\tt htinst} must
-fully verify that the value conforms to one of the cases listed above.
-For instance, for RV64, discovering that bits 6:0 of {\tt mtinst} are
-{\tt 0000011} and bits 14:12 are {\tt 010} is not sufficient to establish
-that the first case applies and the trapping instruction is a standard LW
-instruction;
-rather, software must also confirm that bits 63:32 of {\tt mtinst} are
-all zeros.
-A future standard might define new values for 64-bit {\tt mtinst} that
-are nonzero in bits 63:32 yet may coincidentally have in bits 31:0 the
-same bit patterns as standard RV64 instructions.
-\end{commentary}
-
-\begin{commentary}
-Unlike for standard instructions, there is no requirement that the
-instruction encoding of a custom value be of the same ``kind'' as the
-instruction that trapped (or even have any correlation with the trapping
-instruction).
-\end{commentary}
-
-Table~\ref{tab:tinst-values} shows the values that may be automatically
-written to the trap instruction register for each standard exception
-cause.
-For exceptions that prevent the fetching of an instruction, only zero or
-a pseudoinstruction value may be written.
-A custom value may be automatically written only if the instruction that
-traps is non-standard.
-A future standard or extension may permit other values to be written,
-chosen from the set of allowed values established earlier.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|c|c|c|c|}
-\hline
- & & Transformed & & Pseudo- \\
- & & Standard & Custom & instruction \\
-Exception & Zero & Instruction & Value & Value \\
-\hline
-Instruction address misaligned & Yes & No & Yes & No \\
-\hline
-Instruction access fault & Yes & No & No & No \\
-Illegal instruction & Yes & No & No & No \\
-Breakpoint & Yes & No & Yes & No \\
-Virtual instruction & Yes & No & Yes & No \\
-\hline
-Load address misaligned & Yes & Yes & Yes & No \\
-Load access fault & Yes & Yes & Yes & No \\
-Store/AMO address misaligned & Yes & Yes & Yes & No \\
-Store/AMO access fault & Yes & Yes & Yes & No \\
-\hline
-Environment call & Yes & No & Yes & No \\
-\hline
-Instruction page fault & Yes & No & No & No \\
-Load page fault & Yes & Yes & Yes & No \\
-Store/AMO page fault & Yes & Yes & Yes & No \\
-\hline
-Instruction guest-page fault & Yes & No & No & Yes \\
-Load guest-page fault & Yes & Yes & Yes & Yes \\
-Store/AMO guest-page fault & Yes & Yes & Yes & Yes \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Values that may be automatically written to the trap instruction
-register ({\tt mtinst} or {\tt htinst}) on an exception trap.}
-\label{tab:tinst-values}
-\end{table*}
-
-As enumerated in the table, a synchronous exception may write to the trap
-instruction register a standard transformation of the trapping
-instruction only for exceptions that arise from explicit memory accesses
-(from loads, stores, and AMO instructions).
-Accordingly, standard transformations are currently defined only for
-these memory-access instructions.
-If a synchronous trap occurs for a standard instruction for which no
-transformation has been defined, the trap instruction register shall be
-written with zero (or, under certain circumstances, with a special
-pseudoinstruction value).
-
-\FloatBarrier
-
-For a standard load instruction that is not a compressed instruction and
-is one of LB, LBU, LH, LHU, LW, LWU, LD, FLW, FLD, FLQ, or FLH, the
-transformed instruction has the format shown in
-Figure~\ref{transformedloadinst}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{Addr.\ Offset} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Transformed noncompressed load instruction (LB, LBU, LH, LHU,
-LW, LWU, LD, FLW, FLD, FLQ, or FLH).
-Fields funct3, rd, and opcode are the same as the trapping load
-instruction.}
-\label{transformedloadinst}
-\end{figure*}
-
-For a standard store instruction that is not a compressed instruction and
-is one of SB, SH, SW, SD, FSW, FSD, FSQ, or FSH, the transformed instruction
-has the format shown in Figure~\ref{transformedstoreinst}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{Addr.\ Offset} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Transformed noncompressed store instruction (SB, SH, SW, SD,
-FSW, FSD, FSQ, or FSH).
-Fields rs2, funct3, and opcode are the same as the trapping store
-instruction.}
-\label{transformedstoreinst}
-\end{figure*}
-
-For a standard atomic instruction (load-reserved, store-conditional, or
-AMO instruction), the transformed instruction has the format shown in
-Figure~\ref{transformedatomicinst}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}R@{}c@{}c@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{27} &
-\instbit{26} &
-\instbit{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct5} &
-\multicolumn{1}{c|}{aq} &
-\multicolumn{1}{c|}{rl} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{Addr.\ Offset} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-5 & 1 & 1 & 5 & 5 & 3 & 5 & 7 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Transformed atomic instruction (load-reserved,
-store-conditional, or AMO instruction).
-All fields are the same as the trapping instruction except bits 19:15,
-Addr.\ Offset.}
-\label{transformedatomicinst}
-\end{figure*}
-
-For a standard virtual-machine load/store instruction
-(HLV, HLVX, or HSV), the transformed instruction has the format shown in
-Figure~\ref{transformedvmaccessinst}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{Addr.\ Offset} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Transformed virtual-machine load/store instruction (HLV, HLVX, HSV).
-All fields are the same as the trapping instruction except bits 19:15,
-Addr.\ Offset.}
-\label{transformedvmaccessinst}
-\end{figure*}
-
-In all the transformed instructions above, the Addr.\ Offset field that
-replaces the instruction's rs1 field in bits 19:15 is the positive
-difference between the faulting virtual address (written to {\tt mtval}
-or {\tt stval}) and the original virtual address.
-This difference can be nonzero only for a misaligned memory access.
-Note also that, for basic loads and stores, the transformations replace
-the instruction's immediate offset fields with zero.
-
-For a standard compressed instruction (16-bit size), the transformed
-instruction is found as follows:
-\begin{enumerate}
-\item
-Expand the compressed instruction to its 32-bit equivalent.
-\item
-Transform the 32-bit equivalent instruction.
-\item
-Replace bit~1 with a~{\tt 0}.
-\end{enumerate}
-Bits 1:0 of a transformed standard instruction will be binary {\tt 01} if
-the trapping instruction is compressed and {\tt 11} if not.
-
-\begin{commentary}
-In decoding the contents of {\tt mtinst} or {\tt htinst}, once software
-has determined that the register contains the encoding of a standard
-basic load (LB, LBU, LH, LHU, LW, LWU, LD, FLW, FLD, FLQ, or FLH) or basic
-store (SB, SH, SW, SD, FSW, FSD, FSQ, or FSH), it is not necessary to confirm
-also that the immediate offset fields (31:25, and 24:20 or 11:7) are
-zeros.
-The knowledge that the register's value is the encoding of a basic
-load/store is sufficient to prove that the trapping instruction is of the
-same kind.
-
-A future version of this standard may add information to the fields that
-are currently zeros.
-However, for backwards compatibility, any such information will be for
-performance purposes only and can safely be ignored.
-\end{commentary}
-
-For guest-page faults, the trap instruction register is written with a
-special pseudoinstruction value if:
-(a)~the fault is caused by an implicit memory access for VS-stage address
-translation, and
-(b)~a nonzero value (the faulting guest physical address) is written to
-{\tt mtval2} or {\tt htval}.
-If both conditions are met, the value written to {\tt mtinst} or
-{\tt htinst} must be taken from Table~\ref{tab:pseudoinsts};
-zero is not allowed.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|l|}
-\hline
-Value & Meaning \\
-\hline
-{\tt 0x00002000} & 32-bit read for VS-stage address translation (RV32) \\
-{\tt 0x00002020} & 32-bit write for VS-stage address translation (RV32) \\
-\hline
-{\tt 0x00003000} & 64-bit read for VS-stage address translation (RV64) \\
-{\tt 0x00003020} & 64-bit write for VS-stage address translation (RV64) \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Special pseudoinstruction values for guest-page faults.
-The RV32 values are used when VSXLEN=32, and the RV64 values when VSXLEN=64.}
-\label{tab:pseudoinsts}
-\end{table*}
-
-The defined pseudoinstruction values are designed to correspond closely
-with the encodings of basic loads and stores, as illustrated by
-Table~\ref{tab:pseudoinsts-basis}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|l|}
-\hline
-Encoding & Instruction \\ \hline
-{\tt 0x00002003} & {\tt lw x0,0(x0)} \\
-{\tt 0x00002023} & {\tt sw x0,0(x0)} \\ \hline
-{\tt 0x00003003} & {\tt ld x0,0(x0)} \\
-{\tt 0x00003023} & {\tt sd x0,0(x0)} \\ \hline
-\end{tabular}
-\end{center}
-\caption{Standard instructions corresponding to the special
-pseudoinstructions of Table~\ref{tab:pseudoinsts}.}
-\label{tab:pseudoinsts-basis}
-\end{table*}
-
-A \textit{write} pseudoinstruction ({\tt 0x00002020} or {\tt 0x00003020})
-is used for the case that the machine is attempting automatically to
-update bits A and/or D in VS-level page tables.
-All other implicit memory accesses for VS-stage address translation will
-be reads.
-If a machine never automatically updates bits A or D in VS-level page
-tables (leaving this to software), the \textit{write} case will never
-arise.
-The fact that such a page table update must actually be atomic, not just
-a simple write, is ignored for the pseudoinstruction.
-
-\begin{commentary}
-If the conditions that necessitate a pseudoinstruction value can ever
-occur for M-mode, then {\tt mtinst} cannot be entirely read-only zero;
-and likewise for HS-mode and {\tt htinst}.
-However, in that case, the trap instruction registers may minimally
-support only values 0 and {\tt 0x00002000} or {\tt 0x00003000}, and
-possibly {\tt 0x00002020} or {\tt 0x00003020}, requiring as few as one or
-two flip-flops in hardware, per register.
-\end{commentary}
-
-\begin{commentary}
-There is no harm here in ignoring the atomicity requirement for page
-table updates, because a hypervisor is not expected in these
-circumstances to emulate an implicit memory access that fails.
-Rather, the hypervisor is given enough information about the faulting
-access to be able to make the memory accessible (e.g.\ by restoring a
-missing page of virtual memory) before resuming execution by retrying the
-faulting instruction.
-\end{commentary}
-
-\subsection{Trap Return}
-
-The MRET instruction is used to return from a trap taken into M-mode.
-MRET first determines what the new privilege mode will be according to
-the values of MPP and MPV in {\tt mstatus} or {\tt mstatush}, as encoded in
-Table~\ref{h-mpp}.
-MRET then in {\tt mstatus}/{\tt mstatush} sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1.
-Lastly, MRET sets the privilege mode as previously
-determined, and sets {\tt pc}={\tt mepc}.
-
-The SRET instruction is used to return from a trap taken into HS-mode or
-VS-mode. Its behavior depends on the current virtualization mode.
-
-When executed in M-mode or HS-mode (i.e., V=0), SRET first determines
-what the new privilege mode will be according to the values in
-{\tt hstatus}.SPV and {\tt sstatus}.SPP, as encoded in Table~\ref{h-spp}.
-SRET then sets {\tt hstatus}.SPV=0, and in {\tt sstatus} sets SPP=0,
-SIE=SPIE, and SPIE=1.
-Lastly, SRET sets the privilege mode as previously
-determined, and sets {\tt pc}={\tt sepc}.
-
-When executed in VS-mode (i.e., V=1), SRET sets the privilege mode according to
-Table~\ref{h-vspp}, in {\tt vsstatus} sets SPP=0, SIE=SPIE, and SPIE=1, and
-lastly sets {\tt pc}={\tt vsepc}.
diff --git a/src/latex/machine.tex b/src/latex/machine.tex
deleted file mode 100644
index 53c997a..0000000
--- a/src/latex/machine.tex
+++ /dev/null
@@ -1,3823 +0,0 @@
-\chapter{Machine-Level ISA, Version 1.12}
-\label{machine}
-
-This chapter describes the machine-level operations available in
-machine-mode (M-mode), which is the highest privilege mode in a RISC-V
-system. M-mode is used for low-level access to a
-hardware platform and is the first mode entered at reset. M-mode can
-also be used to implement features that are too difficult or expensive
-to implement in hardware directly. The RISC-V machine-level ISA
-contains a common core that is extended depending on which other
-privilege levels are supported and other details of the hardware
-implementation.
-
-\section{Machine-Level CSRs}
-
-In addition to the machine-level CSRs described in this section,
-M-mode code can access all CSRs at lower privilege levels.
-
-\subsection{Machine ISA Register {\tt misa}}
-\label{sec:misa}
-
-The {\tt misa} CSR is a \warl\ read-write register
-reporting the ISA supported by the hart. This register must be
-readable in any implementation, but a value of zero can be returned to
-indicate the {\tt misa} register has not been implemented, requiring
-that CPU capabilities be determined through a separate non-standard
-mechanism.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}c@{}L}
-\instbitrange{MXLEN-1}{MXLEN-2} &
-\instbitrange{MXLEN-3}{26} &
-\instbitrange{25}{0} \\
-\hline
-\multicolumn{1}{|c|}{MXL[1:0] (\warl)} &
-\multicolumn{1}{c|}{0 (\warl)} &
-\multicolumn{1}{c|}{Extensions[25:0] (\warl)} \\
-\hline
-2 & MXLEN-28 & 26 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine ISA register ({\tt misa}).}
-\label{misareg}
-\end{figure*}
-
-The MXL (Machine XLEN) field encodes the native base integer ISA width
-as shown in Table~\ref{misabase}. The MXL field may be writable in
-implementations that support multiple base ISAs. The effective
-XLEN in M-mode, {\em MXLEN}, is given by the setting of MXL, or has a
-fixed value if {\tt misa} is zero. The MXL field is always set to the
-widest supported ISA variant at reset.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|r|}
-\hline
-MXL & XLEN \\
-\hline
-1 & 32 \\
-2 & 64 \\
-3 & 128 \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of MXL field in {\tt misa}}
-\label{misabase}
-\end{table*}
-
-The {\tt misa} CSR is MXLEN bits wide. If the value read from {\tt misa} is
-nonzero, field MXL of that value always denotes the current MXLEN. If a write
-to {\tt misa} causes MXLEN to change, the position of MXL moves to the
-most-significant two bits of {\tt misa} at the new width.
-
-\begin{commentary}
-The base width can be quickly ascertained using branches on the sign
-of the returned {\tt misa} value, and possibly a shift left by one and
-a second branch on the sign. These checks can be written in assembly
-code without knowing the register width (XLEN) of the machine. The
-base width is given by $\mbox{XLEN}=2^{\mbox{MXL+4}}$.
-
-The base width can also be found if {\tt misa} is zero, by placing the
-immediate 4 in a register then shifting the register left by 31 bits
-at a time. If zero after one shift, then the machine is RV32. If
-zero after two shifts, then the machine is RV64, else RV128.
-\end{commentary}
-
-The Extensions field encodes the presence of the standard extensions,
-with a single bit per letter of the alphabet (bit 0 encodes presence
-of extension ``A'' , bit 1 encodes presence of extension ``B'',
-through to bit 25 which encodes ``Z''). The ``I'' bit will be set for
-RV32I, RV64I, RV128I base ISAs, and the ``E'' bit will be set for
-RV32E. The Extensions field is a \warl\ field that can contain writable bits
-where the implementation allows the supported ISA to be modified. At
-reset, the Extensions field shall contain the maximal set of supported
-extensions, and I shall be selected over E if both are available.
-
-When a standard extension is disabled by clearing its bit in {\tt misa}, the instructions and CSRs defined or modified by the extension revert to their defined or reserved behaviors as if the extension is not implemented.
-
-\begin{commentary}
-For a given \mbox{RISC-V} execution environment, an instruction,
-extension, or other feature of the \mbox{RISC-V} ISA
-is ordinarily judged to be \emph{implemented} or not
-by the observable execution behavior in that environment.
-For example, the F extension is said to be implemented for
-an execution environment if and only if the instructions that the
-\mbox{RISC-V} Unprivileged ISA defines for F execute as specified.
-
-With this definition of \emph{implemented}, disabling an
-extension by clearing its bit in {\tt misa} results in the
-extension being considered \emph{not implemented} in M-mode.
-For example, setting {\tt misa}.F=0 results in the F extension
-being not implemented for M-mode, because the F extension's
-instructions will not act as the Unprivileged ISA requires
-but may instead raise an illegal instruction exception.
-
-Defining the term \emph{implemented}
-based strictly on the observable behavior might conflict
-with other common understandings of the same word.
-In particular, although common usage may allow
-for the combination ``implemented but disabled,''
-in this document it is considered a contradiction of terms,
-because \emph{disabled} implies execution will not behave
-as required for the feature to be considered \emph{implemented}.
-In the same vein, ``implemented and enabled'' is redundant here;
-``implemented'' suffices.
-\end{commentary}
-
-The design of the RV128I base ISA is not yet complete, and while much of the remainder of this
-specification is expected to apply to RV128, this version of the document
-focuses only on RV32 and RV64.
-
-The ``U'' and ``S'' bits will be set if there is support for user and
-supervisor modes respectively.
-
-The ``X'' bit will be set if there are any non-standard extensions.
-
-\begin{table*}
-\begin{center}
-\begin{tabular}{|r|r|l|}
-\hline
-Bit & Character & Description \\
-\hline
- 0 & A & Atomic extension \\
- 1 & B & {\em Reserved} \\
- 2 & C & Compressed extension \\
- 3 & D & Double-precision floating-point extension \\
- 4 & E & RV32E base ISA \\
- 5 & F & Single-precision floating-point extension \\
- 6 & G & {\em Reserved} \\
- 7 & H & Hypervisor extension \\
- 8 & I & RV32I/64I/128I base ISA \\
- 9 & J & {\em Reserved} \\
- 10 & K & {\em Reserved} \\
- 11 & L & {\em Reserved} \\
- 12 & M & Integer Multiply/Divide extension \\
- 13 & N & {\em Tentatively reserved for User-Level Interrupts extension} \\
- 14 & O & {\em Reserved} \\
- 15 & P & {\em Tentatively reserved for Packed-SIMD extension} \\
- 16 & Q & Quad-precision floating-point extension \\
- 17 & R & {\em Reserved} \\
- 18 & S & Supervisor mode implemented \\
- 19 & T & {\em Reserved} \\
- 20 & U & User mode implemented \\
- 21 & V & ``V'' Vector extension implemented \\
- 22 & W & {\em Reserved} \\
- 23 & X & Non-standard extensions present \\
- 24 & Y & {\em Reserved} \\
- 25 & Z & {\em Reserved} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of Extensions field in {\tt misa}. All bits that are
- reserved for future use must return zero when read.}
-\label{misaletters}
-\end{table*}
-
-
-\begin{commentary}
-The {\tt misa} CSR exposes a rudimentary catalog of CPU features
-to machine-mode code. More extensive information can be obtained in
-machine mode by probing other machine registers, and examining other
-ROM storage in the system as part of the boot process.
-
-We require that lower privilege levels execute environment calls
-instead of reading CPU registers to determine features available at
-each privilege level. This enables virtualization layers to alter the
-ISA observed at any level, and supports a much richer command
-interface without burdening hardware designs.
-\end{commentary}
-
-The ``E'' bit is read-only. Unless {\tt misa} is all read-only zero, the ``E''
-bit always reads as the complement of the ``I'' bit.
-If an execution environment supports both RV32E and RV32I,
-software can select RV32E by clearing the ``I'' bit.
-
-If an ISA feature {\em x} depends on an ISA feature {\em y}, then attempting
-to enable feature {\em x} but disable feature {\em y} results in both features
-being disabled. For example, setting ``F''=0 and ``D''=1 results in both
-``F'' and ``D'' being cleared.
-
-An implementation may impose additional constraints on the collective setting
-of two or more {\tt misa} fields, in which case they function collectively as
-a single \warl\ field. An attempt to write an unsupported combination causes
-those bits to be set to some supported combination.
-
-Writing {\tt misa} may increase IALIGN, e.g., by disabling the ``C''
-extension.
-If an instruction that would write {\tt misa} increases IALIGN, and
-the subsequent instruction's address is not IALIGN-bit aligned, the
-write to {\tt misa} is suppressed, leaving {\tt misa} unchanged.
-
-When software enables an extension that was previously disabled, then all
-state uniquely associated with that extension is \unspecified, unless
-otherwise specified by that extension.
-
-\subsection{Machine Vendor ID Register {\tt mvendorid}}
-
-The {\tt mvendorid} CSR is a 32-bit read-only register providing
-the JEDEC manufacturer ID of the provider of the core. This register
-must be readable in any implementation, but a value of 0 can be
-returned to indicate the field is not implemented or that this is a
-non-commercial implementation.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{JS}
-\instbitrange{31}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{Bank} &
-\multicolumn{1}{c|}{Offset} \\
-\hline
-25 & 7 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Vendor ID register ({\tt mvendorid}).}
-\label{mvendorreg}
-\end{figure*}
-
-JEDEC manufacturer IDs are ordinarily encoded as a sequence of one-byte
-continuation codes {\tt 0x7f}, terminated by a one-byte ID not equal to
-{\tt 0x7f}, with an odd parity bit in the most-significant bit of each byte.
-{\tt mvendorid} encodes the number of one-byte continuation
-codes in the Bank field, and encodes the final byte in the Offset field,
-discarding the parity bit. For example, the JEDEC manufacturer ID
-{\tt 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x8a}
-(twelve continuation codes followed by {\tt 0x8a}) would be encoded in the
-{\tt mvendorid} CSR as {\tt 0x60a}.
-
-\begin{commentary}
-In JEDEC's parlance, the bank number is one greater than the number of
-continuation codes; hence, the {\tt mvendorid} Bank field encodes a value
-that is one less than the JEDEC bank number.
-\end{commentary}
-
-\begin{commentary}
-Previously the vendor ID was to be a number allocated by RISC-V
-International, but this duplicates the work of JEDEC in maintaining a
-manufacturer ID standard. At time of writing, registering a
-manufacturer ID with JEDEC has a one-time cost of \$500.
-\end{commentary}
-
-\subsection{Machine Architecture ID Register {\tt marchid}}
-
-The {\tt marchid} CSR is an MXLEN-bit read-only register encoding the
-base microarchitecture of the hart. This register must be readable in
-any implementation, but a value of 0 can be returned to indicate the
-field is not implemented. The combination of {\tt mvendorid} and {\tt
- marchid} should uniquely identify the type of hart microarchitecture
-that is implemented.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Architecture ID} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Architecture ID register ({\tt marchid}).}
-\label{marchreg}
-\end{figure*}
-
-Open-source project architecture IDs are allocated globally by
-RISC-V International, and have non-zero architecture IDs with a zero
-most-significant-bit (MSB). Commercial architecture IDs are allocated
-by each commercial vendor independently, but must have the MSB set and
-cannot contain zero in the remaining MXLEN-1 bits.
-
-\begin{commentary}
-The intent is for the architecture ID to represent the
-microarchitecture associated with the repo around which development
-occurs rather than a particular organization. Commercial fabrications
-of open-source designs should (and might be required by the license
-to) retain the original architecture ID. This will aid in reducing
-fragmentation and tool support costs, as well as provide attribution.
-Open-source architecture IDs are administered by RISC-V International
-and should only be allocated to released, functioning open-source
-projects. Commercial architecture IDs can be managed independently by
-any registered vendor but are required to have IDs disjoint from the
-open-source architecture IDs (MSB set) to prevent collisions if a
-vendor wishes to use both closed-source and open-source
-microarchitectures.
-
-The convention adopted within the following Implementation field can
-be used to segregate branches of the same architecture design,
-including by organization. The {\tt misa} register also helps
-distinguish different variants of a design.
-\end{commentary}
-
-\subsection{Machine Implementation ID Register {\tt mimpid}}
-
-The {\tt mimpid} CSR provides a unique encoding of the version of the
-processor implementation. This register must be readable in any
-implementation, but a value of 0 can be returned to indicate that the
-field is not implemented. The Implementation value should reflect the
-design of the RISC-V processor itself and not any surrounding system.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Implementation} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Implementation ID register ({\tt mimpid}).}
-\label{mimpidreg}
-\end{figure*}
-
-\begin{commentary}
-The format of this field is left to the provider of the architecture
-source code, but will often be printed by standard tools as a
-hexadecimal string without any leading or trailing zeros, so the
-Implementation value can be left-justified (i.e., filled in from
-most-significant nibble down) with subfields aligned on nibble
-boundaries to ease human readability.
-\end{commentary}
-
-\subsection{Hart ID Register {\tt mhartid}}
-
-The {\tt mhartid} CSR is an MXLEN-bit read-only register
-containing the integer ID of the hardware thread running the code.
-This register must be readable in any implementation. Hart IDs might
-not necessarily be numbered contiguously in a multiprocessor system,
-but at least one hart must have a hart ID of zero. Hart IDs must be
-unique within the execution environment.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Hart ID}\\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hart ID register ({\tt mhartid}).}
-\label{mhartidreg}
-\end{figure*}
-
-
-\begin{commentary}
-In certain cases, we must ensure exactly one hart runs some code
-(e.g., at reset), and so require one hart to have a known hart ID of
-zero.
-
-For efficiency, system implementers should aim to reduce the magnitude
-of the largest hart ID used in a system.
-\end{commentary}
-
-\subsection{Machine Status Registers ({\tt mstatus} and {\tt mstatush})}
-
-The {\tt mstatus} register is an MXLEN-bit read/write register
-formatted as shown in Figure~\ref{mstatusreg-rv32} for RV32 and
-Figure~\ref{mstatusreg} for RV64. The {\tt mstatus}
-register keeps track of and controls the hart's current operating
-state. A restricted view of {\tt mstatus} appears as the
-{\tt sstatus} register in the S-level ISA.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cKccccccc}
-\\
-\instbit{31} &
-\instbitrange{30}{23} &
-\instbit{22} &
-\instbit{21} &
-\instbit{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{TSR} &
-\multicolumn{1}{c|}{TW} &
-\multicolumn{1}{c|}{TVM} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{MPRV} &
- \\
-\hline
-1 & 8 & 1 & 1 & 1 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWWcWccccccccc}
-\\
-&
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{MPP[1:0]} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{MPIE} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine-mode status register ({\tt mstatus}) for RV32.}
-\label{mstatusreg-rv32}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\scalebox{0.95}{
-\begin{tabular}{cRccccYcccccc}
-\\
-\instbit{63} &
-\instbitrange{62}{38} &
-\instbit{37} &
-\instbit{36} &
-\instbitrange{35}{34} &
-\instbitrange{33}{32} &
-\instbitrange{31}{23} &
-\instbit{22} &
-\instbit{21} &
-\instbit{20} &
-\instbit{19} &
-\instbit{18} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MBE} &
-\multicolumn{1}{c|}{SBE} &
-\multicolumn{1}{c|}{SXL[1:0]} &
-\multicolumn{1}{c|}{UXL[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{TSR} &
-\multicolumn{1}{c|}{TW} &
-\multicolumn{1}{c|}{TVM} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
- \\
-\hline
-1 & 25 & 1 & 1 & 2 & 2 & 9 & 1 & 1 & 1 & 1 & 1 & \\
-\end{tabular}}
-\scalebox{0.95}{
-\begin{tabular}{ccWWcWccccccccc}
-\\
-&
-\instbit{17} &
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{MPRV} &
-\multicolumn{1}{c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{MPP[1:0]} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{MPIE} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 1 & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine-mode status register ({\tt mstatus}) for RV64.}
-\label{mstatusreg}
-\end{figure*}
-
-For RV32 only, {\tt mstatush} is a 32-bit read/write register formatted
-as shown in Figure~\ref{mstatushreg}.
-Bits 30:4 of {\tt mstatush} generally contain the same fields found in
-bits 62:36 of {\tt mstatus} for RV64.
-Fields SD, SXL, and UXL do not exist in {\tt mstatush}.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{JccF}
-\\
-\instbitrange{31}{6} &
-\instbit{5} &
-\instbit{4} &
-\instbitrange{3}{0} \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{MBE} &
-\multicolumn{1}{c|}{SBE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
-26 & 1 & 1 & 4 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Additional machine-mode status register ({\tt mstatush}) for RV32.}
-\label{mstatushreg}
-\end{figure*}
-
-\subsubsection{Privilege and Global Interrupt-Enable Stack in {\tt mstatus} register}
-\label{privstack}
-
-Global interrupt-enable bits, MIE and SIE, are provided for M-mode and
-S-mode respectively.
-These bits are primarily used to guarantee atomicity
-with respect to interrupt handlers in the current privilege mode.
-
-\begin{commentary}
-The global {\em x}\/IE bits are located in the low-order bits of {\tt mstatus},
-allowing them to be atomically set or cleared with a single CSR
-instruction.
-\end{commentary}
-
-When a hart is executing in privilege mode {\em x}, interrupts are
-globally enabled when {\em x}\/IE=1 and globally disabled when {\em
- x}\/IE=0. Interrupts for lower-privilege modes, {\em w}$<${\em x},
-are always globally disabled regardless of the setting of any global
-{\em w}\/IE bit for the lower-privilege mode.
-Interrupts for
-higher-privilege modes, {\em y}$>${\em x}, are always globally enabled
-regardless of the setting of the global {\em y}\/IE bit for the
-higher-privilege mode.
-Higher-privilege-level code can use separate
-per-interrupt enable bits to disable selected higher-privilege-mode
-interrupts before ceding control to a lower-privilege mode.
-
-\begin{commentary}
- A higher-privilege mode {\em y} could disable all of its interrupts
- before ceding control to a lower-privilege mode but this would be
- unusual as it would leave only a synchronous trap, non-maskable
- interrupt, or reset as means to regain control of the hart.
-\end{commentary}
-
-To support nested traps, each privilege mode {\em x} that can respond to
-interrupts has a two-level
-stack of interrupt-enable bits and privilege modes. {\em x}\/PIE
-holds the value of the interrupt-enable bit active prior to the trap,
-and {\em x}\/PP holds the previous privilege mode. The {\em x}\/PP
-fields can only hold privilege modes up to {\em x}, so MPP is
-two bits wide and SPP is one bit wide. When
-a trap is taken from privilege mode {\em y} into privilege mode {\em
- x}, {\em x}\/PIE is set to the value of {\em x}\/IE; {\em x}\/IE is set to
-0; and {\em x}\/PP is set to {\em y}.
-
-\begin{commentary}
-For lower privilege modes, any trap (synchronous or asynchronous) is
-usually taken at a higher privilege mode with interrupts disabled upon entry.
-The higher-level trap handler will either service the trap and return
-using the stacked information, or, if not returning immediately to the
-interrupted context, will save the privilege stack before re-enabling
-interrupts, so only one entry per stack is required.
-\end{commentary}
-
-An MRET or SRET instruction is used to return from
-a trap in M-mode or S-mode respectively. When
-executing an {\em x}\/RET instruction, supposing {\em x}\/PP holds the
-value {\em y}, {\em x}\/IE is set to {\em x}\/PIE; the privilege mode
-is changed to {\em y}; {\em x}\/PIE is set to 1; and {\em x}\/PP is
-set to the least-privileged supported mode (U if U-mode is implemented, else M).
-If {\em y}$\neq$M, {\em x}\/RET also sets MPRV=0.
-
-\begin{commentary}
-Setting {\em x}\/PP to the least-privileged supported mode on an {\em x}\/RET
-helps identify software bugs in the management of the two-level privilege-mode
-stack.
-\end{commentary}
-
-{\em x}\/PP fields are \warl\ fields that can hold only privilege mode {\em x}
-and any implemented privilege mode lower than {\em x}. If privilege mode {\em
-x} is not implemented, then {\em x}\/PP must be read-only~0.
-
-\begin{commentary}
-M-mode software can determine whether a privilege mode is implemented
-by writing that mode to MPP then reading it back.
-
-If the machine provides only U and M modes, then only a single
-hardware storage bit is required to represent either 00 or 11 in MPP.
-\end{commentary}
-
-\subsubsection{Base ISA Control in {\tt mstatus} Register}
-\label{xlen-control}
-
-For RV64 systems, the SXL and UXL fields are \warl\ fields
-that control the value of XLEN for S-mode and U-mode,
-respectively. The encoding of these fields is the same as the MXL
-field of {\tt misa}, shown in Table~\ref{misabase}. The effective
-XLEN in S-mode and U-mode are termed {\em SXLEN} and {\em UXLEN},
-respectively.
-
-For RV32 systems, the SXL and UXL fields do not exist, and
-SXLEN=32 and UXLEN=32.
-
-For RV64 systems, if S-mode is not supported, then SXL is read-only
-zero. Otherwise, it is a \warl\ field that encodes the current value of
-SXLEN. In particular, an implementation may make SXL be a read-only
-field whose value always ensures that SXLEN=MXLEN.
-
-For RV64 systems, if U-mode is not supported, then UXL is read-only
-zero. Otherwise, it is a \warl\ field that encodes the current value of
-UXLEN. In particular, an implementation may make UXL be a read-only
-field whose value always ensures that UXLEN=MXLEN or UXLEN=SXLEN.
-
-Whenever XLEN in any mode is set to a value less than the widest
-supported XLEN, all operations must ignore source operand register
-bits above the configured XLEN, and must sign-extend results to fill
-the entire widest supported XLEN in the destination register.
-Similarly, {\tt pc} bits above XLEN are ignored, and when the {\tt pc}
-is written, it is sign-extended to fill the widest supported XLEN.
-\begin{commentary}
-We require that operations always fill the entire underlying hardware
-registers with defined values to avoid implementation-defined
-behavior.
-
-To reduce hardware complexity, the architecture imposes no checks that
-lower-privilege modes have XLEN settings less than or equal to the
-next-higher privilege mode. In practice, such settings would almost
-always be a software bug, but machine operation is well-defined even in this
-case.
-\end{commentary}
-
-If MXLEN is changed from 32 to a wider width, each of {\tt mstatus} fields SXL and
-UXL, if not restricted to a single value, gets the value corresponding to the
-widest supported width not wider than the new MXLEN.
-
-\subsubsection{Memory Privilege in {\tt mstatus} Register}
-
-The MPRV (Modify PRiVilege) bit modifies the {\em effective privilege mode},
-i.e., the privilege level at which loads
-and stores execute. When MPRV=0, loads and stores
-behave as normal, using the translation and protection mechanisms of the
-current privilege mode.
-When MPRV=1, load and store memory addresses are translated and protected, and
-endianness is applied, as though the current privilege mode were set to MPP.
-Instruction address-translation and protection are unaffected by the setting
-of MPRV. MPRV is read-only 0 if U-mode is not supported.
-
-An MRET or SRET instruction that changes the privilege mode to a mode
-less privileged than M also sets MPRV=0.
-
-The MXR (Make eXecutable Readable) bit modifies the privilege with which loads
-access virtual memory. When MXR=0, only loads from pages marked readable (R=1
-in Figure~\ref{sv32pte}) will succeed. When MXR=1, loads from pages marked
-either readable or executable (R=1 or X=1) will succeed. MXR has no effect
-when page-based virtual memory is not in effect. MXR is read-only 0 if
-S-mode is not supported.
-
-\begin{commentary}
-The MPRV and MXR mechanisms were conceived to improve the efficiency of M-mode
-routines that emulate missing hardware features, e.g., misaligned loads and
-stores. MPRV obviates the need to perform address translation in software.
-MXR allows instruction words to be loaded from pages marked execute-only.
-
-The current privilege mode and the privilege mode specified by MPP might have
-different XLEN settings. When MPRV=1, load and store memory addresses are
-treated as though the current XLEN were set to MPP's XLEN, following the rules
-in Section~\ref{xlen-control}.
-\end{commentary}
-
-The SUM (permit Supervisor User Memory access) bit modifies the privilege with
-which S-mode loads and stores access virtual memory.
-When SUM=0, S-mode memory accesses to pages that are accessible by U-mode (U=1
-in Figure~\ref{sv32pte}) will fault. When SUM=1, these accesses are
-permitted. SUM has no effect when page-based virtual memory is not in effect.
-Note that, while SUM is ordinarily ignored when not executing in S-mode, it
-{\em is} in effect when MPRV=1 and MPP=S. SUM is read-only 0 if S-mode is
-not supported or if {\tt satp}.MODE is read-only~0.
-
-The MXR and SUM mechanisms only affect the interpretation of permissions
-encoded in page-table entries. In particular, they have no impact on whether
-access-fault exceptions are raised due to PMAs or PMP.
-
-\subsubsection{Endianness Control in {\tt mstatus} and {\tt mstatush} Registers}
-
-The MBE, SBE, and UBE bits in {\tt mstatus} and {\tt mstatush} are
-\warl\ fields that control the endianness of memory accesses other than
-instruction fetches.
-Instruction fetches are always little-endian.
-
-MBE controls whether non-instruction-fetch memory accesses made from
-M-mode (assuming {\tt mstatus}.MPRV=0) are little-endian (MBE=0) or
-big-endian (MBE=1).
-
-If S-mode is not supported, SBE is read-only~0.
-Otherwise, SBE controls whether explicit load and store memory accesses made
-from S-mode are little-endian (SBE=0) or big-endian (SBE=1).
-
-If U-mode is not supported, UBE is read-only~0.
-Otherwise, UBE controls whether explicit load and store memory accesses made
-from U-mode are little-endian (UBE=0) or big-endian (UBE=1).
-
-For {\em implicit} accesses to supervisor-level memory management data
-structures, such as page tables, endianness is always controlled by SBE.
-Since changing SBE alters the implementation's interpretation of these data
-structures, if any such data structures remain in use across a change to SBE,
-M-mode software must follow such a change to SBE by executing an
-SFENCE.VMA instruction with {\em rs1}={\tt x0} and {\em rs2}={\tt x0}.
-
-\begin{commentary}
-Only in contrived scenarios will a given memory-management data structure be
-interpreted as both little-endian and big-endian.
-In practice, SBE will only be changed at runtime on world switches, in which
-case neither the old nor new memory-management data structure will be
-reinterpreted in a different endianness.
-In this case, no additional SFENCE.VMA is necessary, beyond what would
-ordinarily be required for a world switch.
-\end{commentary}
-
-If S-mode is supported, an implementation may make SBE be a read-only
-copy of MBE.
-If U-mode is supported, an implementation may make UBE be a read-only
-copy of either MBE or SBE.
-
-\begin{commentary}
-An implementation supports only little-endian memory accesses if fields
-MBE, SBE, and UBE are all read-only~0.
-An implementation supports only big-endian memory accesses (aside from
-instruction fetches) if MBE is read-only 1 and SBE and UBE are each
-read-only 1 when S-mode and U-mode are supported.
-\end{commentary}
-
-\begin{commentary}
-Volume I defines a hart's address space as a circular sequence of
-$2^{\text{XLEN}}$ bytes at consecutive addresses.
-The correspondence between addresses and byte locations is fixed and not
-affected by any endianness mode.
-Rather, the applicable endianness mode determines the order of mapping
-between memory bytes and a multibyte quantity (halfword, word, etc.).
-\end{commentary}
-
-\begin{commentary}
-Standard RISC-V ABIs are expected to be purely little-endian-only or
-big-endian-only, with no accommodation for mixing endianness.
-Nevertheless, endianness control has been defined so as to permit, for
-instance, an OS of one endianness to execute user-mode programs of the
-opposite endianness.
-Consideration has been given also to the possibility of non-standard
-usages whereby software flips the endianness of memory accesses as
-needed.
-\end{commentary}
-
-\begin{commentary}
-RISC-V instructions are uniformly little-endian to decouple instruction
-encoding from the current endianness settings, for the benefit of both
-hardware and software.
-Otherwise, for instance, a RISC-V assembler or disassembler would always
-need to know the intended active endianness, despite that the endianness
-mode might change dynamically during execution.
-In contrast, by giving instructions a fixed endianness, it is sometimes
-possible for carefully written software to be endianness-agnostic even in
-binary form, much like position-independent code.
-
-The choice to have instructions be only little-endian does have
-consequences, however, for RISC-V software that encodes or decodes
-machine instructions.
-In big-endian mode, such software must account for the fact that explicit
-loads and stores have endianness opposite that of instructions, for
-example by swapping byte order after loads and before stores.
-\end{commentary}
-
-\subsubsection{Virtualization Support in {\tt mstatus} Register}
-\label{virt-control}
-
-The TVM (Trap Virtual Memory) bit is a \warl\ field that supports intercepting
-supervisor virtual-memory management operations. When TVM=1,
-attempts to read or write the {\tt satp} CSR or execute an SFENCE.VMA or
-SINVAL.VMA instruction while executing in S-mode will raise an illegal instruction
-exception. When TVM=0, these operations are permitted in S-mode.
-TVM is read-only 0 when S-mode is not supported.
-
-\begin{commentary}
-The TVM mechanism improves virtualization efficiency by permitting guest
-operating systems to execute in S-mode, rather than classically virtualizing
-them in U-mode. This approach obviates the need to trap accesses to most
-S-mode CSRs.
-
-Trapping {\tt satp} accesses and the SFENCE.VMA and SINVAL.VMA instructions
-provides the hooks necessary to lazily populate shadow page tables.
-\end{commentary}
-
-The TW (Timeout Wait) bit is a \warl\ field that supports intercepting the WFI
-instruction (see Section~\ref{wfi}).
-When TW=0, the WFI instruction may execute in lower
-privilege modes when not prevented for some other reason. When TW=1,
-then if WFI is executed in any less-privileged mode, and it does not complete
-within an implementation-specific, bounded time limit, the WFI instruction
-causes an illegal instruction exception.
-An implementation may have WFI always raise an illegal instruction exception
-in less-privileged modes when TW=1, even if there are pending
-globally-disabled interrupts when the instruction is executed.
-TW is read-only 0 when there are no modes less privileged than
-M.
-
-\begin{commentary}
-Trapping the WFI
-instruction can trigger a world switch to another guest OS, rather than
-wastefully idling in the current guest.
-\end{commentary}
-
-When S-mode is implemented, then executing WFI in U-mode causes an illegal
-instruction exception, unless it completes within an implementation-specific,
-bounded time limit. A future revision of this specification might add
-a feature that allows S-mode to selectively permit WFI in U-mode. Such
-a feature would only be active when TW=0.
-
-The TSR (Trap SRET) bit is a \warl\ field that
-supports intercepting the supervisor exception return
-instruction, SRET. When TSR=1, attempts to execute SRET while executing in
-S-mode will raise an illegal instruction exception. When TSR=0, this
-operation is permitted in S-mode. TSR is read-only 0 when S-mode is not
-supported.
-
-\begin{commentary}
-Trapping SRET is necessary to emulate the hypervisor extension
-(see Chapter~\ref{hypervisor}) on implementations that do not provide it.
-\end{commentary}
-
-\subsubsection{Extension Context Status in {\tt mstatus} Register}
-
-Supporting substantial extensions is one of the primary goals of
-RISC-V, and hence we define a standard interface to allow unchanged
-privileged-mode code, particularly a supervisor-level OS, to support
-arbitrary user-mode state extensions.
-
-\begin{commentary}
- To date, the V extension is the only standard extension that defines
- additional state beyond the floating-point CSR and data registers.
-\end{commentary}
-
-The FS[1:0] and VS[1:0] \warl\ fields and the XS[1:0] read-only field are used
-to reduce the cost of context save and restore by setting and tracking
-the current state of the floating-point unit and any other user-mode
-extensions respectively.
-The FS field encodes the status of the floating-point unit state, including
-the floating-point registers {\tt f0}--{\tt f31} and the CSRs
-{\tt fcsr}, {\tt frm}, and {\tt fflags}.
-The VS field encodes the status of the vector extension state, including
-the vector registers {\tt v0}--{\tt v31} and the CSRs
-{\tt vcsr}, {\tt vxrm}, {\tt vxsat}, {\tt vstart}, {\tt vl}, {\tt vtype},
-and {\tt vlenb}.
-The XS field encodes the
-status of additional user-mode extensions and associated state.
-These fields can be checked by a context switch routine to quickly
-determine whether a state save or restore is required. If a save or
-restore is required, additional instructions and CSRs are typically
-required to effect and optimize the process.
-
-\begin{commentary}
- The design anticipates that most context switches will not need to
- save/restore state in either or both of the floating-point unit or
- other extensions, so provides a fast check via the SD bit.
-\end{commentary}
-
-The FS, VS, and XS fields use the same status encoding as shown in
-Table~\ref{fsxsencoding}, with the four possible status values being
-Off, Initial, Clean, and Dirty.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|l|l|}
-\hline
-Status & FS and VS Meaning & XS Meaning\\
-\hline
-0 & Off & All off \\
-1 & Initial & None dirty or clean, some on\\
-2 & Clean & None dirty, some clean \\
-3 & Dirty & Some dirty \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of FS[1:0], VS[1:0], and XS[1:0] status fields.}
-\label{fsxsencoding}
-\end{table*}
-
-If the F extension is implemented, the FS field shall not be
-read-only zero.
-
-If neither the F extension nor S-mode is implemented, then FS is
-read-only zero.
-If S-mode is implemented but the F extension is not, FS may optionally
-be read-only zero.
-
-\begin{commentary}
-Implementations with S-mode but without the F extension are
-permitted, but not required, to make the FS field be read-only zero.
-Some such implementations will choose {\em not} to have the FS
-field be read-only zero, so as to enable emulation of the F extension for
-both S-mode and U-mode via invisible traps into M-mode.
-\end{commentary}
-
-If the {\tt v} registers are implemented, the VS field shall not be
-read-only zero.
-
-If neither the {\tt v} registers nor S-mode is implemented, then VS
-is read-only zero.
-If S-mode is implemented but the {\tt v} registers are not, VS may
-optionally be read-only zero.
-
-In systems without additional user extensions requiring new state, the
-XS field is read-only zero. Every additional extension with state
-provides a CSR field that encodes the equivalent of the XS states.
-The XS field represents a summary of all
-extensions' status as shown in Table~\ref{fsxsencoding}.
-
-\begin{commentary}
-The XS field effectively reports the maximum status value across all
-user-extension status fields, though individual extensions can use a
-different encoding than XS.
-\end{commentary}
-
-The SD bit is a read-only bit that summarizes whether either the FS,
-VS, or XS fields signal the presence of some dirty state that will
-require saving extended user context to memory. If FS, XS, and VS are all
-read-only zero, then SD is also always zero.
-
-When an extension's status is set to Off, any instruction that
-attempts to read or write the corresponding state will cause an illegal instruction
-exception. When the status is Initial, the corresponding state should
-have an initial constant value. When the status is Clean, the
-corresponding state is potentially different from the initial value,
-but matches the last value stored on a context swap. When the status
-is Dirty, the corresponding state has potentially been modified since
-the last context save.
-
-During a context save, the responsible privileged code need only write
-out the corresponding state if its status is Dirty, and can then reset
-the extension's status to Clean. During a context restore, the
-context need only be loaded from memory if the status is Clean (it
-should never be Dirty at restore). If the status is Initial, the
-context must be set to an initial constant value on context restore to
-avoid a security hole, but this can be done without accessing memory.
-For example, the floating-point registers can all be initialized to
-the immediate value 0.
-
-The FS and XS fields are read by the privileged code before saving the
-context. The FS field is set directly by privileged code when
-resuming a user context, while the XS field is set indirectly by
-writing to the status register of the individual extensions. The
-status fields will also be updated during execution of instructions,
-regardless of privilege mode.
-
-Extensions to the user-mode ISA often include additional user-mode
-state, and this state can be considerably larger than the base integer
-registers. The extensions might only be used for some applications,
-or might only be needed for short phases within a single application.
-To improve performance, the user-mode extension can define additional
-instructions to allow user-mode software to return the unit to an
-initial state or even to turn off the unit.
-
-For example, a coprocessor might require to be configured before use
-and can be ``unconfigured'' after use. The unconfigured state would
-be represented as the Initial state for context save. If the same
-application remains running between the unconfigure and the next
-configure (which would set status to Dirty), there is no need to
-actually reinitialize the state at the unconfigure instruction, as all
-state is local to the user process, i.e., the Initial state may only
-cause the coprocessor state to be initialized to a constant value at
-context restore, not at every unconfigure.
-
-Executing a user-mode instruction to disable a unit and place it into
-the Off state will cause an illegal instruction exception to be raised
-if any subsequent instruction tries to use the unit before it is
-turned back on. A user-mode instruction to turn a unit on must also
-ensure the unit's state is properly initialized, as the unit might
-have been used by another context meantime.
-
-Changing the setting of FS has no effect on the contents of the floating-point
-register state. In particular, setting FS=Off does not destroy the state, nor
-does setting FS=Initial clear the contents.
-Similarly, the setting of VS has no effect on the contents of the vector register
-state.
-Other extensions, however, might not preserve state when set to Off.
-
-Implementations may choose to track the dirtiness of the floating-point
-register state imprecisely by reporting the state to be dirty even when
-it has not been modified. On some implementations, some instructions that
-do not mutate the floating-point state may cause the state to transition from
-Initial or Clean to Dirty. On other implementations, dirtiness might not be
-tracked at all, in which case the valid FS states are Off and Dirty, and an
-attempt to set FS to Initial or Clean causes it to be set to Dirty.
-\begin{commentary}
-This definition of FS does not disallow setting FS to Dirty as a result of
-errant speculation. Some platforms may choose to disallow speculatively
-writing FS to close a potential side channel.
-\end{commentary}
-
-If an instruction explicitly or implicitly writes a floating-point register or
-the {\tt fcsr} but does not alter its contents, and FS=Initial or FS=Clean, it
-is implementation-defined whether FS transitions to Dirty.
-
-Implementations may choose to track the dirtiness of the vector register state in an
-analogous imprecise fashion, including possibly setting VS to Dirty when
-software attempts to set VS=Initial or VS=Clean.
-When VS=Initial or VS=Clean, it is implementation-defined whether an
-instruction that writes a vector register or vector CSR but does not alter its
-contents causes VS to transition to Dirty.
-
-Table~\ref{fsxsstates} shows all the possible state transitions for
-the FS, VS, or XS status bits. Note that the standard floating-point
-and vector extensions do not support user-mode unconfigure or disable/enable
-instructions.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|l|}
-\hline
-\multicolumn{1}{|r|}{Current State} & Off & Initial & Clean & Dirty \\
-Action & & & &\\
-\hline
-\hline
-\multicolumn{5}{|c|}{At context save in privileged code}\\
-\hline
-Save state? & No & No & No & Yes \\
-Next state & Off & Initial & Clean & Clean \\
-\hline
-\hline
-\multicolumn{5}{|c|}{At context restore in privileged code}\\
-\hline
-Restore state? & No & Yes, to initial & Yes, from memory & N/A \\
-Next state & Off & Initial & Clean & N/A \\
-\hline
-\hline
-\multicolumn{5}{|c|}{Execute instruction to read state}\\
-\hline
-Action? & Exception & Execute & Execute & Execute \\
-Next state & Off & Initial & Clean & Dirty \\
-\hline
-\hline
-\multicolumn{5}{|c|}{Execute instruction that possibly modifies state, including configuration}\\
-\hline
-Action? & Exception & Execute & Execute & Execute \\
-Next state & Off & Dirty & Dirty & Dirty \\
-\hline
-\hline
-\multicolumn{5}{|c|}{Execute instruction to unconfigure unit}\\
-\hline
-Action? & Exception & Execute & Execute & Execute \\
-Next state & Off & Initial & Initial & Initial \\
-\hline
-\hline
-\multicolumn{5}{|c|}{Execute instruction to disable unit}\\
-\hline
-Action? & Execute & Execute & Execute & Execute \\
-Next state & Off & Off & Off & Off \\
-\hline
-\hline
-\multicolumn{5}{|c|}{Execute instruction to enable unit}\\
-\hline
-Action? & Execute & Execute & Execute & Execute \\
-Next state & Initial & Initial & Initial & Initial \\
-\hline
-\end{tabular}
-\end{center}
-\caption{FS, VS, and XS state transitions.}
-\label{fsxsstates}
-\end{table*}
-
-Standard privileged instructions to initialize, save, and restore
-extension state are provided to insulate privileged code from details
-of the added extension state by treating the state as an opaque
-object.
-
-\begin{commentary}
-Many coprocessor extensions are only used in limited contexts that
-allows software to safely unconfigure or even disable units when done.
-This reduces the context-switch overhead of large stateful
-coprocessors.
-
-We separate out floating-point state from other extension state, as
-when a floating-point unit is present the floating-point registers are
-part of the standard calling convention, and so user-mode software
-cannot know when it is safe to disable the floating-point unit.
-\end{commentary}
-
-The XS field provides a summary of all added extension state, but
-additional microarchitectural bits might be maintained in the
-extension to further reduce context save and restore overhead.
-
-The SD bit is read-only and is set when either the FS, VS, or XS bits
-encode a Dirty state (i.e., SD=((FS==11) OR (XS==11) OR (VS==11))). This allows
-privileged code to quickly determine when no additional context save is
-required beyond the integer register set and {\tt pc}.
-
-The floating-point unit state is always initialized, saved, and
-restored using standard instructions (F, D, and/or Q), and privileged
-code must be aware of FLEN to determine the appropriate space to
-reserve for each {\tt f} register.
-
-Machine and Supervisor modes share a single copy of the FS, VS, and XS bits.
-Supervisor-level software
-normally uses the FS, VS, and XS bits directly to record the status with
-respect to the supervisor-level saved context.
-Machine-level software must be more conservative in saving and restoring the
-extension state in their corresponding version of the context.
-
-\begin{commentary}
-In any reasonable use case, the number of context switches between
-user and supervisor level should far outweigh the number of context
-switches to other privilege levels. Note that coprocessors should not
-require their context to be saved and restored to service asynchronous
-interrupts, unless the interrupt results in a user-level context swap.
-\end{commentary}
-
-\subsection{Machine Trap-Vector Base-Address Register ({\tt mtvec})}
-
-The {\tt mtvec} register is an MXLEN-bit \warl\ read/write register that holds
-trap vector configuration, consisting of a vector base address (BASE) and a
-vector mode (MODE).
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J@{}S}
-\instbitrange{MXLEN-1}{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{BASE[MXLEN-1:2] (\warl)} &
-\multicolumn{1}{c|}{MODE (\warl)} \\
-\hline
-MXLEN-2 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine trap-vector base-address register ({\tt mtvec}).}
-\label{mtvecreg}
-\end{figure*}
-
-The {\tt mtvec} register must always be implemented, but can contain
-a read-only value. If {\tt mtvec} is writable, the set of values
-the register may hold can vary by implementation. The value in the BASE field
-must always be aligned on a 4-byte boundary, and the MODE setting may impose
-additional alignment constraints on the value in the BASE field.
-
-\begin{commentary}
-We allow for considerable flexibility in implementation of the trap
-vector base address. On the one hand, we do not wish to burden low-end
-implementations with a large number of state bits, but on the other
-hand, we wish to allow flexibility for larger systems.
-\end{commentary}
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|c|l|}
-\hline
-Value & Name & Description \\
-\hline
-0 & Direct & All exceptions set {\tt pc} to BASE. \\
-1 & Vectored & Asynchronous interrupts set {\tt pc} to BASE+4$\times$cause. \\
-$\ge$2 & --- & {\em Reserved} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of {\tt mtvec} MODE field.}
-\label{mtvec-mode}
-\end{table*}
-
-The encoding of the MODE field is shown in Table~\ref{mtvec-mode}. When
-MODE=Direct, all traps into machine mode cause the {\tt pc} to be set to the
-address in the BASE field. When MODE=Vectored, all synchronous exceptions
-into machine mode cause the {\tt pc} to be set to the address in the BASE
-field, whereas interrupts cause the {\tt pc} to be set to the address in
-the BASE field plus four times the interrupt cause number. For example,
-a machine-mode timer interrupt (see Table~\ref{mcauses} on page~\pageref{mcauses}) causes the {\tt pc}
-to be set to BASE+{\tt 0x1c}.
-
-\begin{commentary}
-When vectored interrupts are enabled, interrupt cause 0, which corresponds to
-user-mode software interrupts, are vectored to the same location as
-synchronous exceptions. This ambiguity does not arise in practice, since
-user-mode software interrupts are either disabled or delegated to user mode.
-\end{commentary}
-
-An implementation may have different alignment constraints for
-different modes. In particular, MODE=Vectored may have stricter
-alignment constraints than MODE=Direct.
-
-\begin{commentary}
- Allowing coarser alignments in Vectored mode enables vectoring to be
- implemented without a hardware adder circuit.
-\end{commentary}
-
-\begin{commentary}
-Reset and NMI vector locations are given in a platform specification.
-\end{commentary}
-
-\subsection{Machine Trap Delegation Registers ({\tt medeleg} and {\tt mideleg})}
-
-By default, all traps at any privilege level are handled in machine
-mode, though a machine-mode handler can redirect traps back to the
-appropriate level with the MRET instruction (Section~\ref{otherpriv}).
-To increase performance, implementations can provide individual
-read/write bits within {\tt medeleg} and {\tt mideleg} to indicate
-that certain exceptions and interrupts should be processed directly by
-a lower privilege level. The machine exception delegation register
-({\tt medeleg}) and machine interrupt delegation register ({\tt
- mideleg}) are MXLEN-bit read/write registers.
-
-In systems with S-mode, the {\tt medeleg} and {\tt mideleg} registers
-must exist, and setting a bit in
-{\tt medeleg} or {\tt mideleg} will delegate the corresponding trap, when
-occurring in S-mode or U-mode, to the S-mode trap handler.
-In systems without S-mode, the {\tt medeleg} and {\tt mideleg} registers
-should not exist.
-
-\begin{commentary}
- In versions 1.9.1 and earlier , these registers existed but were
- hardwired to zero in M-mode only, or M/U without N systems. There
- is no reason to require they return zero in those cases, as the {\tt
- misa} register indicates whether they exist.
-\end{commentary}
-
-
-When a trap is delegated to S-mode, the
-{\tt scause} register is written with the trap cause; the
-{\tt sepc} register is written with the virtual address of
-the instruction that took the trap; the
-{\tt stval} register is written with an
-exception-specific datum; the SPP field
-of {\tt mstatus} is written with the active privilege mode at the time of
-the trap; the SPIE field of {\tt mstatus} is written with the
-value of the SIE field at the time of the trap; and
-the SIE field of {\tt mstatus} is cleared.
-The {\tt mcause}, {\tt mepc}, and {\tt mtval} registers and the MPP and
-MPIE fields of {\tt mstatus} are not written.
-
-An
-implementation can choose to subset the delegatable traps, with the
-supported delegatable bits found by writing one to every bit location,
-then reading back the value in {\tt medeleg} or {\tt mideleg} to see
-which bit positions hold a one.
-
-An implementation shall not have any bits of {\tt medeleg} be read-only one, i.e.,
-any synchronous trap that can be delegated must support not being delegated.
-Similarly, an implementation shall not fix as read-only one any bits of
-{\tt mideleg} corresponding to machine-level interrupts (but may do so
-for lower-level interrupts).
-
-\begin{commentary}
-Version 1.11 and earlier prohibited having any bits of {\tt mideleg}
-be read-only one.
-Platform standards may always add such restrictions.
-\end{commentary}
-
-Traps never transition from a more-privileged mode to a less-privileged mode.
-For example, if M-mode has delegated illegal instruction exceptions to S-mode, and
-M-mode software later executes an illegal instruction, the trap is taken in
-M-mode, rather than being delegated to S-mode. By contrast, traps may be
-taken horizontally. Using the same example, if M-mode has delegated illegal
-instruction exceptions to S-mode, and S-mode software later executes an illegal
-instruction, the trap is taken in S-mode.
-
-Delegated interrupts result in the interrupt being masked at the delegator
-privilege level. For example, if the supervisor timer interrupt (STI) is
-delegated to S-mode by setting {\tt mideleg}[5], STIs will not be taken when
-executing in M-mode. By contrast, if {\tt mideleg}[5] is clear, STIs can
-be taken in any mode and regardless of current mode will transfer control to
-M-mode.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}U}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Synchronous Exceptions (\warl)} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Exception Delegation Register {\tt medeleg}.}
-\label{medelegreg}
-\end{figure}
-
-{\tt medeleg} has a bit position allocated for every synchronous exception
-shown in Table~\ref{mcauses} on page~\pageref{mcauses}, with the index of the bit position equal to the
-value returned in the {\tt mcause} register (i.e., setting bit 8 allows
-user-mode environment calls to be delegated to a lower-privilege trap
-handler).
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}U}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Interrupt Delegation Register {\tt mideleg}.}
-\label{midelegreg}
-\end{figure}
-
-{\tt mideleg} holds trap delegation bits for individual interrupts, with the
-layout of bits matching those in the {\tt mip} register (i.e., STIP interrupt
-delegation control is located in bit 5).
-
-For exceptions that cannot occur in less privileged modes, the corresponding
-{\tt medeleg} bits should be read-only zero. In particular,
-{\tt medeleg}[11] is read-only zero.
-
-\subsection{Machine Interrupt Registers ({\tt mip} and {\tt mie})}
-
-The {\tt mip} register is an MXLEN-bit read/write register containing
-information on pending interrupts, while {\tt mie} is the
-corresponding MXLEN-bit read/write register containing interrupt enable
-bits.
-Interrupt cause number \textit{i} (as reported in CSR {\tt mcause},
-Section~\ref{sec:mcause}) corresponds with bit~\textit{i} in both
-{\tt mip} and {\tt mie}.
-Bits 15:0 are allocated to standard interrupt causes only, while bits 16
-and above are designated for platform or custom use.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}U}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Interrupt-Pending Register ({\tt mip}).}
-\label{mipreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}U}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Interrupt-Enable Register ({\tt mie}).}
-\label{miereg}
-\end{figure}
-
-An interrupt~\textit{i} will trap to M-mode (causing the privilege mode
-to change to M-mode) if all of the following are true:
-(a)~either the current privilege mode is M and the MIE bit in the
-{\tt mstatus} register is set, or the current privilege mode has less
-privilege than M-mode;
-(b)~bit~\textit{i} is set in both {\tt mip} and {\tt mie}; and
-(c)~if register {\tt mideleg} exists, bit~\textit{i} is not set in
-{\tt mideleg}.
-
-These conditions for an interrupt trap to occur must be evaluated in a bounded
-amount of time from when an interrupt becomes, or ceases to be,
-pending in {\tt mip}, and must
-also be evaluated immediately following the execution of an {\em x}\/RET
-instruction or an explicit write to a CSR on which these interrupt trap
-conditions expressly depend (including {\tt mip}, {\tt mie}, {\tt mstatus},
-and {\tt mideleg}).
-
-Interrupts to M-mode take priority over any interrupts to lower privilege
-modes.
-
-Each individual bit in register {\tt mip} may be writable or may be
-read-only.
-When bit~\textit{i} in {\tt mip} is writable, a pending interrupt
-\textit{i} can be cleared by writing 0 to this bit.
-If interrupt \textit{i} can become pending but bit~\textit{i} in
-{\tt mip} is read-only, the implementation must provide some other
-mechanism for clearing the pending interrupt.
-
-A bit in {\tt mie} must be writable if the corresponding interrupt can
-ever become pending.
-Bits of {\tt mie} that are not writable must be read-only zero.
-
-The standard portions (bits 15:0) of registers {\tt mip} and {\tt mie}
-are formatted as shown in Figures \ref{mipreg-standard} and
-\ref{miereg-standard} respectively.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{Rcccccccccccc}
-\instbitrange{15}{12} &
-\instbit{11} &
-\instbit{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{MEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MTIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MSIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-4 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt mip}.}
-\label{mipreg-standard}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{Rcccccccccccc}
-\instbitrange{15}{12} &
-\instbit{11} &
-\instbit{10} &
-\instbit{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{MEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MTIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{MSIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIE} &
-\multicolumn{1}{c|}{0} \\
-\hline
-4 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt mie}.}
-\label{miereg-standard}
-\end{figure*}
-
-\begin{commentary}
- The machine-level interrupt registers handle a few root interrupt
- sources which are assigned a fixed service priority for simplicity,
- while separate external interrupt controllers can implement a more
- complex prioritization scheme over a much larger set of interrupts
- that are then muxed into the machine-level interrupt sources.
-\end{commentary}
-
-\begin{commentary}
-The non-maskable interrupt is not made visible via the {\tt mip}
-register as its presence is implicitly known when executing the NMI
-trap handler.
-\end{commentary}
-
-Bits {\tt mip}.MEIP and {\tt mie}.MEIE are the interrupt-pending and
-interrupt-enable bits for machine-level external interrupts.
-MEIP is read-only in {\tt mip}, and is set and cleared by a
-platform-specific interrupt controller.
-
-Bits {\tt mip}.MTIP and {\tt mie}.MTIE are the interrupt-pending and
-interrupt-enable bits for machine timer interrupts.
-MTIP is read-only in {\tt mip}, and is cleared by writing to the memory-mapped
-machine-mode timer compare register.
-
-Bits {\tt mip}.MSIP and {\tt mie}.MSIE are the interrupt-pending and
-interrupt-enable bits for machine-level software interrupts.
-MSIP is read-only in {\tt mip}, and is written by accesses
-to memory-mapped control registers, which are used by remote harts to
-provide machine-level interprocessor interrupts.
-A hart can write its
-own MSIP bit using the same memory-mapped control register.
-If a system has only one hart, or if a platform standard supports the
-delivery of machine-level interprocessor interrupts through external
-interrupts (MEI) instead, then {\tt mip}.MSIP and {\tt mie}.MSIE may
-both be read-only zeros.
-
-If supervisor mode is not implemented, bits SEIP, STIP, and SSIP of
-{\tt mip} and SEIE, STIE, and SSIE of {\tt mie} are read-only zeros.
-
-If supervisor mode is implemented, bits {\tt mip}.SEIP and {\tt mie}.SEIE
-are the interrupt-pending and interrupt-enable bits for supervisor-level
-external interrupts.
-SEIP is writable in {\tt mip}, and
-may be written by M-mode software to indicate to S-mode that an
-external interrupt is pending. Additionally, the platform-level
-interrupt controller may generate supervisor-level external interrupts.
-Supervisor-level external interrupts are made pending based on the
-logical-OR of the software-writable SEIP bit and the signal from the
-external interrupt controller.
-When {\tt mip} is read with a CSR instruction,
-the value of the SEIP bit returned in the {\tt rd} destination
-register is the logical-OR of the software-writable bit and the
-interrupt signal from the interrupt controller, but the signal from the
-interrupt controller is not used to calculate the value written to SEIP.
-Only the software-writable SEIP bit participates in the
-read-modify-write sequence of a CSRRS or CSRRC instruction.
-
-\begin{commentary}
- For example, if we name the software-writable SEIP bit {\tt B} and the
- signal from the external interrupt controller {\tt E}, then if \mbox{\tt csrrs
- t0, mip, t1} is executed, {\tt t0[9]} is written with \mbox{\tt B || E}, then
- {\tt B} is written with \mbox{\tt B || t1[9]}.
- If \mbox{\tt csrrw t0, mip, t1} is executed, then {\tt t0[9]} is written with
- \mbox{\tt B || E}, and {\tt B} is simply written with {\tt t1[9]}.
- In neither case does {\tt B} depend upon {\tt E}.
-
- The SEIP field behavior is designed to allow a higher privilege
- layer to mimic external interrupts cleanly, without losing any real
- external interrupts. The behavior of the CSR instructions is
- slightly modified from regular CSR accesses as a result.
-\end{commentary}
-
-If supervisor mode is implemented, bits {\tt mip}.STIP and {\tt mie}.STIE
-are the interrupt-pending and interrupt-enable bits for supervisor-level
-timer interrupts.
-STIP is writable in {\tt mip}, and may be
-written by M-mode software to deliver timer interrupts to S-mode.
-
-If supervisor mode is implemented, bits {\tt mip}.SSIP and {\tt mie}.SSIE
-are the interrupt-pending and interrupt-enable bits for supervisor-level
-software interrupts.
-SSIP is writable in {\tt mip} and may also be set to 1 by a platform-specific
-interrupt controller.
-
-Multiple simultaneous
-interrupts destined for M-mode are handled in the following
-decreasing priority order: MEI, MSI, MTI, SEI, SSI, STI.
-
-\begin{commentary}
- The machine-level interrupt fixed-priority ordering rules were developed
- with the following rationale.
-
- Interrupts for higher privilege modes must be serviced before
- interrupts for lower privilege modes to support preemption.
-
- The platform-specific machine-level interrupt sources in bits 16 and above
- have platform-specific priority, but are typically chosen to have the
- highest service priority to support very fast local vectored interrupts.
-
- External interrupts are handled before internal (timer/software)
- interrupts as external interrupts are usually generated by devices
- that might require low interrupt service times.
-
- Software interrupts are handled before internal timer interrupts,
- because internal timer interrupts are usually intended for time
- slicing, where time precision is less important, whereas software
- interrupts are used for inter-processor messaging. Software
- interrupts can be avoided when high-precision timing is required, or
- high-precision timer interrupts can be routed via a different
- interrupt path.
- Software interrupts are located in the lowest four bits of {\tt mip}
- as these are often written by software, and this position allows the
- use of a single CSR instruction with a five-bit immediate.
-\end{commentary}
-
-Restricted views of the {\tt mip} and {\tt mie} registers appear as
-the {\tt sip} and {\tt sie} registers for supervisor level.
-If an interrupt is delegated to
-S-mode by setting a bit in the {\tt mideleg} register,
-it becomes visible in the {\tt sip} register and is maskable
-using the {\tt sie} register. Otherwise, the corresponding
-bits in {\tt sip} and {\tt sie} are read-only
-zero.
-
-\subsection{Hardware Performance Monitor}
-
-M-mode includes a basic hardware performance-monitoring facility. The
-{\tt mcycle} CSR counts the number of clock cycles executed by the
-processor core on which the hart is running.
-The {\tt minstret} CSR counts the number of instructions the hart has
-retired. The {\tt mcycle} and {\tt minstret} registers have 64-bit
-precision on all RV32 and RV64 systems.
-
-The counter registers have an arbitrary value after the hart is reset, and
-can be written with a given value. Any CSR write takes effect after
-the writing instruction has otherwise completed.
-The {\tt mcycle} CSR may be shared between harts on the same core, in which
-case writes to {\tt mcycle} will be visible to those harts.
-The platform should provide a mechanism to indicate which harts share an {\tt
-mcycle} CSR.
-
-The hardware performance monitor includes 29 additional 64-bit event counters, {\tt
-mhpmcounter3}--{\tt mhpmcounter31}. The event selector CSRs, {\tt
-mhpmevent3}--{\tt mhpmevent31}, are MXLEN-bit \warl\ registers that control which event
-causes the corresponding counter to increment. The meaning of these events is
-defined by the platform, but event 0 is defined to mean ``no event.''
-All counters should be implemented, but a legal implementation is to make
-both the counter and its corresponding event selector be read-only~0.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}K@{}W@{}K}
-\instbitrange{63}{0} \\ \cline{1-1}
-\multicolumn{1}{|c|}{\tt mcycle} \\ \cline{1-1}
-\multicolumn{1}{|c|}{\tt minstret} \\ \cline{1-1}
- & & \instbitrange{MXLEN-1}{0} \\ \cline{1-1}\cline{3-3}
-\multicolumn{1}{|c|}{\tt mhpmcounter3} & & \multicolumn{1}{|c|}{\tt mhpmevent3} \\ \cline{1-1}\cline{3-3}
-\multicolumn{1}{|c|}{\tt mhpmcounter4} & & \multicolumn{1}{|c|}{\tt mhpmevent4} \\ \cline{1-1}\cline{3-3}
-\multicolumn{1}{c}{\vdots} & & \multicolumn{1}{c}{\vdots} \\ \cline{1-1}\cline{3-3}
-\multicolumn{1}{|c|}{\tt mhpmcounter30} & & \multicolumn{1}{|c|}{\tt mhpmevent30} \\ \cline{1-1}\cline{3-3}
-\multicolumn{1}{|c|}{\tt mhpmcounter31} & & \multicolumn{1}{|c|}{\tt mhpmevent31} \\ \cline{1-1}\cline{3-3}
-64 & & MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Hardware performance monitor counters.}
-\end{figure}
-
-The {\tt mhpmcounter}s are \warl\ registers that support up to 64 bits of
-precision on RV32 and RV64.
-
-\begin{commentary}
-A future revision of this specification will define a mechanism to generate an
-interrupt when a hardware performance monitor counter overflows.
-\end{commentary}
-
-When MXLEN=32, reads of the {\tt mcycle}, {\tt minstret}, and {\tt
-mhpmcounter{\em n}} CSRs return bits 31--0 of the corresponding counter, and
-writes change only bits 31--0; reads of the {\tt mcycleh}, {\tt minstreth},
-and {\tt mhpmcounter{\em n}h} CSRs return bits 63--32 of the corresponding
-counter, and writes change only bits 63--32.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}K}
-\instbitrange{31}{0} \\ \hline
-\multicolumn{1}{|c|}{\tt mcycleh} \\ \hline
-\multicolumn{1}{|c|}{\tt minstreth} \\ \hline
-\multicolumn{1}{|c|}{\tt mhpmcounter3h} \\ \hline
-\multicolumn{1}{|c|}{\tt mhpmcounter4h} \\ \hline
-\multicolumn{1}{c}{\vdots} \\ \hline
-\multicolumn{1}{|c|}{\tt mhpmcounter30h} \\ \hline
-\multicolumn{1}{|c|}{\tt mhpmcounter31h} \\ \hline
-32 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Upper 32 bits of hardware performance monitor counters, RV32 only.}
-\end{figure}
-
-%On RV128 systems, the 64-bit values in {\tt mcycle}, {\tt minstret}, and
-%{\tt mhpmcounter{\em n}} are sign-extended to 128-bits when read.
-%\begin{commentary}
-%On RV128 systems, both signed and unsigned 64-bit values are held in a
-%canonical form with bit 63 repeated in all higher bit positions. The
-%counters are 64-bit values even in RV128, and so the counter CSR reads
-%preserve the sign-extension invariant. Implementations may choose to
-%implement fewer bits of the counters, provided software would be unlikely
-%to experience wraparound (e.g., $2^{63}$ instructions executed)
-%and thereby avoid having to actually implement the sign-extension
-%circuitry.
-%\end{commentary}
-
-\subsection{Machine Counter-Enable Register ({\tt mcounteren})}
-\label{sec:mcounteren}
-
-The counter-enable register {\tt mcounteren} is a 32-bit register that
-controls the availability of the hardware performance-monitoring counters to
-the next-lowest privileged mode.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cccMcccccc}
-\instbit{31} &
-\instbit{30} &
-\instbit{29} &
-\instbitrange{28}{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{HPM31} &
-\multicolumn{1}{c|}{HPM30} &
-\multicolumn{1}{c|}{HPM29} &
-\multicolumn{1}{c|}{...} &
-\multicolumn{1}{c|}{HPM5} &
-\multicolumn{1}{c|}{HPM4} &
-\multicolumn{1}{c|}{HPM3} &
-\multicolumn{1}{c|}{IR} &
-\multicolumn{1}{c|}{TM} &
-\multicolumn{1}{c|}{CY} \\
-\hline
-1 & 1 & 1 & 23 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Counter-enable register ({\tt mcounteren}).}
-\label{mcounteren}
-\end{figure*}
-
-The settings in this register only control accessibility. The act
-of reading or writing this register does not affect the underlying
-counters, which continue to increment even when not accessible.
-
-When the CY, TM, IR, or HPM{\em n} bit in the {\tt mcounteren}
-register is clear, attempts to read the {\tt cycle}, {\tt time}, {\tt
- instret}, or {\tt hpmcounter{\em n}} register while executing in
-S-mode or U-mode will cause an illegal instruction exception. When
-one of these bits is set, access to the corresponding register is
-permitted in the next implemented privilege mode (S-mode if
-implemented, otherwise U-mode).
-
-\begin{commentary}
-The counter-enable bits support two common use cases with minimal hardware.
-For systems that do not need high-performance timers and counters,
-machine-mode software can trap accesses and implement all features in
-software. For systems that need high-performance timers and counters
-but are not concerned with obfuscating the underlying hardware
-counters, the counters can be directly exposed to lower privilege modes.
-\end{commentary}
-
-The {\tt cycle}, {\tt instret}, and {\tt hpmcounter{\em n}} CSRs are
-read-only shadows of {\tt mcycle}, {\tt minstret}, and {\tt mhpmcounter{\em
-n}}, respectively. The {\tt time} CSR is a read-only shadow of the
-memory-mapped {\tt mtime} register. Analogously, on RV32I the {\tt cycleh},
-{\tt instreth} and {\tt hpmcounter{\em n}h} CSRs are read-only shadows of
-{\tt mcycleh}, {\tt minstreth} and {\tt mhpmcounter{\em n}h}, respectively.
-On RV32I the {\tt timeh} CSR is a read-only shadow of the upper 32 bits of
-the memory-mapped {\tt mtime} register, while {\tt time} shadows only the
-lower 32 bits of {\tt mtime}.
-\begin{commentary}
-Implementations can convert reads of the {\tt time} and {\tt timeh} CSRs
-into loads to the memory-mapped {\tt mtime} register, or emulate this
-functionality on behalf of less-privileged modes in M-mode software.
-\end{commentary}
-
-In systems with U-mode, the {\tt mcounteren} must be implemented, but all
-fields are \warl\ and may be read-only zero,
-indicating reads to the corresponding counter will
-cause an illegal instruction exception when executing in a less-privileged mode.
-In systems without U-mode, the {\tt mcounteren} register should not exist.
-
-\subsection{Machine Counter-Inhibit CSR ({\tt mcountinhibit})}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cccMcccccc}
-\instbit{31} &
-\instbit{30} &
-\instbit{29} &
-\instbitrange{28}{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{HPM31} &
-\multicolumn{1}{c|}{HPM30} &
-\multicolumn{1}{c|}{HPM29} &
-\multicolumn{1}{c|}{...} &
-\multicolumn{1}{c|}{HPM5} &
-\multicolumn{1}{c|}{HPM4} &
-\multicolumn{1}{c|}{HPM3} &
-\multicolumn{1}{c|}{IR} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{CY} \\
-\hline
-1 & 1 & 1 & 23 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Counter-inhibit register {\tt mcountinhibit}.}
-\label{mcountinhibit}
-\end{figure*}
-
-The counter-inhibit register {\tt mcountinhibit} is a 32-bit \warl\ register
-that controls which of the hardware performance-monitoring counters increment.
-The settings in this register only control whether the counters increment;
-their accessibility is not affected by the setting of this register.
-
-When the CY, IR, or HPM{\em n} bit in the {\tt mcountinhibit} register is
-clear, the {\tt cycle}, {\tt instret}, or {\tt hpmcounter{\em n}} register
-increments as usual. When the CY, IR, or HPM{\em n} bit is set, the
-corresponding counter does not increment.
-
-The {\tt mcycle} CSR may be shared between harts on the same core, in which
-case the {\tt mcountinhibit}.CY field is also shared between those harts,
-and so writes to {\tt mcountinhibit}.CY will be visible to those harts.
-
-If the {\tt mcountinhibit} register is not implemented, the implementation
-behaves as though the register were set to zero.
-
-\begin{commentary}
-When the {\tt cycle} and {\tt instret} counters are not needed, it is
-desirable to conditionally inhibit them to reduce energy consumption.
-Providing a single CSR to inhibit all counters also allows the counters to be
-atomically sampled.
-
-Because the {\tt time} counter can be shared between multiple cores, it
-cannot be inhibited with the {\tt mcountinhibit} mechanism.
-\end{commentary}
-
-\subsection{Machine Scratch Register ({\tt mscratch})}
-
-The {\tt mscratch} register is an MXLEN-bit read/write register
-dedicated for use by machine mode. Typically, it is used to hold a
-pointer to a machine-mode hart-local context space and swapped with a
-user register upon entry to an M-mode trap handler.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mscratch} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine-mode scratch register.}
-\label{mscratchreg}
-\end{figure}
-
-\begin{commentary}
-The MIPS ISA allocated two user registers ({\tt k0}/{\tt k1}) for use
-by the operating system. Although the MIPS scheme provides a fast and
-simple implementation, it also reduces available user registers, and
-does not scale to further privilege levels, or nested traps. It can
-also require both registers are cleared before returning to user level
-to avoid a potential security hole and to provide deterministic
-debugging behavior.
-
-The RISC-V user ISA was designed to support many possible privileged
-system environments and so we did not want to infect the user-level
-ISA with any OS-dependent features. The RISC-V CSR swap instructions
-can quickly save/restore values to the {\tt mscratch} register.
-Unlike the MIPS design, the OS can rely on holding a value in the {\tt
- mscratch} register while the user context is running.
-\end{commentary}
-
-\subsection{Machine Exception Program Counter ({\tt mepc})}
-
-{\tt mepc} is an MXLEN-bit read/write register formatted as shown in
-Figure~\ref{mepcreg}. The low bit of {\tt mepc} ({\tt mepc[0]}) is
-always zero. On implementations that support only IALIGN=32, the two low bits
-({\tt mepc[1:0]}) are always zero.
-
-If an implementation allows IALIGN to be either 16 or 32 (by
-changing CSR {\tt misa}, for example), then, whenever IALIGN=32, bit
-{\tt mepc[1]} is masked on reads so that it appears to be 0. This
-masking occurs also for the implicit read by the MRET instruction.
-Though masked, {\tt mepc[1]} remains writable when IALIGN=32.
-
-{\tt mepc} is a \warl\ register that must be able to hold all valid
-virtual addresses. It need not be capable of holding all possible invalid
-addresses.
-Prior to writing {\tt mepc}, implementations may convert an invalid address
-into some other invalid address that {\tt mepc} is capable of holding.
-
-\begin{commentary}
-When address translation is not in effect, virtual addresses and physical
-addresses are equal.
-Hence, the set of addresses {\tt mepc} must be able to represent includes the
-set of physical addresses that can be used as a valid {\tt pc} or effective
-address.
-\end{commentary}
-
-When a trap is taken into M-mode, {\tt mepc} is written with the
-virtual address of the instruction that was interrupted or that
-encountered the exception. Otherwise, {\tt mepc} is never written by
-the implementation, though it may be explicitly written by software.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mepc} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine exception program counter register.}
-\label{mepcreg}
-\end{figure}
-
-\subsection{Machine Cause Register ({\tt mcause})}
-\label{sec:mcause}
-
-The {\tt mcause} register is an MXLEN-bit read-write register formatted as
-shown in Figure~\ref{mcausereg}. When a trap is taken into M-mode, {\tt
-mcause} is written with a code indicating the event that caused the trap.
-Otherwise, {\tt mcause} is never written by the implementation, though it may be
-explicitly written by software.
-
-The Interrupt bit in the {\tt mcause} register is set if the
-trap was caused by an interrupt. The Exception Code field
- contains a code identifying the last exception or interrupt. Table~\ref{mcauses}
-lists the possible machine-level exception codes. The Exception Code
-is a \wlrl\ field, so is only guaranteed to hold supported exception
-codes.
-
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}U}
-\instbit{MXLEN-1} &
-\instbitrange{MXLEN-2}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupt} &
-\multicolumn{1}{c|}{Exception Code (\wlrl)} \\
-\hline
-1 & MXLEN-1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Cause register {\tt mcause}.}
-\label{mcausereg}
-\end{figure*}
-
-\begin{table*}[htbp]
-\begin{center}
-\begin{tabular}{|r|r|l|l|}
-
- \hline
- Interrupt & Exception Code & Description \\
- \hline
- 1 & 0 & {\em Reserved} \\
- 1 & 1 & Supervisor software interrupt \\
- 1 & 2 & {\em Reserved} \\
- 1 & 3 & Machine software interrupt \\ \hline
- 1 & 4 & {\em Reserved} \\
- 1 & 5 & Supervisor timer interrupt \\
- 1 & 6 & {\em Reserved} \\
- 1 & 7 & Machine timer interrupt \\ \hline
- 1 & 8 & {\em Reserved} \\
- 1 & 9 & Supervisor external interrupt \\
- 1 & 10 & {\em Reserved} \\
- 1 & 11 & Machine external interrupt \\ \hline
- 1 & 12--15 & {\em Reserved} \\
- 1 & $\ge$16 & {\em Designated for platform use} \\ \hline
- 0 & 0 & Instruction address misaligned \\
- 0 & 1 & Instruction access fault \\
- 0 & 2 & Illegal instruction \\
- 0 & 3 & Breakpoint \\
- 0 & 4 & Load address misaligned \\
- 0 & 5 & Load access fault \\
- 0 & 6 & Store/AMO address misaligned \\
- 0 & 7 & Store/AMO access fault \\
- 0 & 8 & Environment call from U-mode\\
- 0 & 9 & Environment call from S-mode \\
- 0 & 10 & {\em Reserved} \\
- 0 & 11 & Environment call from M-mode \\
- 0 & 12 & Instruction page fault \\
- 0 & 13 & Load page fault \\
- 0 & 14 & {\em Reserved} \\
- 0 & 15 & Store/AMO page fault \\
- 0 & 16--23 & {\em Reserved} \\
- 0 & 24--31 & {\em Designated for custom use} \\
- 0 & 32--47 & {\em Reserved} \\
- 0 & 48--63 & {\em Designated for custom use} \\
- 0 & $\ge$64 & {\em Reserved} \\
- \hline
-
-\end{tabular}
-\end{center}
-\caption{Machine cause register ({\tt mcause}) values after trap.}
-\label{mcauses}
-\end{table*}
-
-Note that load and load-reserved instructions generate load exceptions,
-whereas store, store-conditional, and AMO instructions generate store/AMO
-exceptions.
-
-\begin{commentary}
-Interrupts can be separated from other traps with a single branch on the sign of
-the {\tt mcause} register value. A shift left can remove the
-interrupt bit and scale the exception codes to index into a trap
-vector table.
-\end{commentary}
-
-\begin{commentary}
-We do not distinguish privileged instruction exceptions from illegal
-opcode exceptions. This simplifies the architecture and also hides
-details of which higher-privilege instructions are supported by an
-implementation. The privilege level servicing the trap can implement
-a policy on whether these need to be distinguished, and if so, whether
-a given opcode should be treated as illegal or privileged.
-\end{commentary}
-
-If an instruction may raise multiple synchronous exceptions, the
-decreasing priority order of Table~\ref{exception-priority}
-indicates which exception is taken and reported in {\tt mcause}.
-The priority of any custom synchronous exceptions is implementation-defined.
-
-\begin{table*}[htbp]
-\begin{center}
-\begin{tabular}{|l|r|l|}
-
- \hline
- Priority & Exc.\@ Code & Description \\
- \hline
- {\em Highest} & 3 & Instruction address breakpoint \\
- \hline
- & & During instruction address translation: \\
- & 12, 1 & \quad First encountered page fault or
- access fault \\
- \hline
- & & With physical address for instruction: \\
- & 1 & \quad Instruction access fault \\
- \hline
- & 2 & Illegal instruction \\
- & 0 & Instruction address misaligned \\
- & 8, 9, 11 & Environment call \\
- & 3 & Environment break \\
- & 3 & Load/store/AMO address breakpoint \\
- \hline
- & & Optionally: \\
- & 4, 6 & \quad Load/store/AMO address misaligned \\
- \hline
- & & During address translation for an explicit
- memory access: \\
- & 13, 15, 5, 7 & \quad First encountered page fault or
- access fault \\
- \hline
- & & With physical address for an explicit
- memory access: \\
- & 5, 7 & \quad Load/store/AMO access fault \\
- \hline
- & & If not higher priority: \\
- {\em Lowest} & 4, 6 & \quad Load/store/AMO address misaligned \\
- \hline
-
-\end{tabular}
-\end{center}
-\caption{Synchronous exception priority in decreasing priority order.}
-\label{exception-priority}
-\end{table*}
-
-When a virtual address is translated into
-a physical address, the address translation
-algorithm determines what specific exception may be raised.
-
-Load/store/AMO address-misaligned exceptions may have
-either higher or lower priority than load/store/AMO page-fault and
-access-fault exceptions.
-\begin{commentary}
-The relative priority of load/store/AMO address-misaligned and page-fault
-exceptions is implementation-defined to flexibly cater to two design points.
-Implementations that never support misaligned accesses can unconditionally
-raise the misaligned-address exception without performing address translation
-or protection checks.
-Implementations that support misaligned accesses only to some physical
-addresses must translate and check the address before determining whether the
-misaligned access may proceed, in which case raising the page-fault exception
-or access is more appropriate.
-\end{commentary}
-
-\begin{commentary}
-Instruction address breakpoints have the same cause value as, but
-different priority than, data address breakpoints (a.k.a. watchpoints)
-and environment break exceptions (which are raised by the EBREAK instruction).
-\end{commentary}
-
-\begin{commentary}
-Instruction address misaligned exceptions are raised by control-flow
-instructions with misaligned targets, rather than by the act of fetching an
-instruction. Therefore, these exceptions have lower priority than other
-instruction address exceptions.
-\end{commentary}
-
-\FloatBarrier
-\subsection{Machine Trap Value Register ({\tt mtval})}
-
-The {\tt mtval} register is an MXLEN-bit read-write register formatted as shown
-in Figure~\ref{mtvalreg}. When a trap is taken into M-mode, {\tt mtval} is
-either set to zero or written with exception-specific information to assist
-software in handling the trap. Otherwise, {\tt mtval} is never written by the
-implementation, though it may be explicitly written by software. The hardware
-platform will specify which exceptions must set {\tt mtval} informatively and
-which may unconditionally set it to zero.
-If the hardware platform specifies that no exceptions set {\tt mtval} to a
-nonzero value, then {\tt mtval} is read-only zero.
-
-If {\tt mtval} is written with a nonzero value when a breakpoint,
-address-misaligned, access-fault, or page-fault exception occurs on an
-instruction fetch, load, or store, then {\tt mtval} will contain the faulting
-virtual address.
-
-\begin{commentary}
- When page-based virtual memory is enabled, {\tt mtval} is written with
- the faulting virtual address, even for physical-memory access-fault exceptions.
- This design reduces datapath cost for most implementations, particularly
- those with hardware page-table walkers.
-\end{commentary}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mtval} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Trap Value register.}
-\label{mtvalreg}
-\end{figure}
-
-If {\tt mtval} is written with a nonzero value when a misaligned load or store
-causes an access-fault or page-fault exception, then {\tt mtval} will contain
-the virtual address of the portion of the access that caused the fault.
-
-If {\tt mtval} is written with a nonzero value when an instruction
-access-fault or page-fault exception occurs on a system with variable-length
-instructions, then {\tt mtval} will contain the virtual address of the portion
-of the instruction that caused the fault, while {\tt mepc} will point to the
-beginning of the instruction.
-
-The {\tt mtval} register can optionally also be used to return the faulting
-instruction bits on an illegal instruction exception ({\tt mepc} points to the
-faulting instruction in memory).
-If {\tt mtval} is written with a nonzero value when an illegal-instruction
-exception occurs, then {\tt mtval} will contain the shortest of:
-\begin{compactitem}
-\item the actual faulting instruction
-\item the first ILEN bits of the faulting instruction
-\item the first MXLEN bits of the faulting instruction
-\end{compactitem}
-The value loaded into {\tt mtval} on an illegal-instruction exception is
-right-justified and all unused upper bits are cleared to zero.
-
-\begin{commentary}
- Capturing the faulting instruction in {\tt mtval} reduces the
- overhead of instruction emulation, potentially avoiding several
- partial instruction loads if the instruction is misaligned, and
- likely data cache misses or slow uncached accesses when loads are
- used to fetch the instruction into a data register. There is also a
- problem of atomicity if another agent is manipulating the
- instruction memory, as might occur in a dynamic translation system.
-
- A requirement is that the entire instruction (or at least the first
- MXLEN bits) are fetched into {\tt mtval} before taking the trap.
- This should not constrain implementations, which would typically
- fetch the entire instruction before attempting to decode the
- instruction, and avoids complicating software handlers.
-
- A value of zero in {\tt mtval} signifies either that the feature is
- not supported, or an illegal zero instruction was fetched. A load
- from the instruction memory pointed to by {\tt mepc} can be used to
- distinguish these two cases (or alternatively, the system
- configuration information can be interrogated to install the
- appropriate trap handling before runtime).
-\end{commentary}
-
-For other traps, {\tt mtval} is set to zero, but a future standard may
-redefine {\tt mtval}'s setting for other traps.
-
-If {\tt mtval} is not read-only zero, it is a \warl\ register that must be
-able to hold all valid virtual addresses and the value zero.
-It need not be capable of holding all
-possible invalid addresses.
-Prior to writing {\tt mtval}, implementations may convert an invalid address
-into some other invalid address that {\tt mtval} is capable of holding.
-If the feature to return the faulting instruction bits is implemented, {\tt
-mtval} must also be able to hold all values less than $2^N$, where $N$ is the
-smaller of MXLEN and ILEN.
-
-\subsection{Machine Configuration Pointer Register ({\tt mconfigptr})}
-
-{\tt mconfigptr} is an MXLEN-bit read-only CSR, formatted as shown in
-Figure~\ref{mconfigptrreg}, that holds the physical address of a configuration
-data structure.
-Software can traverse this data structure to discover information about
-the harts, the platform, and their configuration.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mconfigptr} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine Configuration Pointer register.}
-\label{mconfigptrreg}
-\end{figure}
-
-The pointer alignment in bits must be no smaller than the greatest supported
-MXLEN: i.e., if the greatest supported MXLEN is $8\times n$, then
-{\tt mconfigptr}[$\log_2n$-1:0] must be zero.
-
-{\tt mconfigptr} must be implemented, but it may be zero to
-indicate the configuration data structure does not exist or that an
-alternative mechanism must be used to locate it.
-
-\begin{commentary}
-The format and schema of the configuration data structure have yet to be standardized.
-\end{commentary}
-
-\begin{commentary}
-While {\tt mconfigptr} will simply be hardwired in some implementations, other
-implementations may provide a means to configure the value returned on CSR
-reads.
-For example, {\tt mconfigptr} might present the value of a memory-mapped
-register that is programmed by the platform or by M-mode software towards the
-beginning of the boot process.
-\end{commentary}
-
-\subsection{%
- Machine Environment Configuration Registers
- ({\tt menvcfg} and {\tt menvcfgh})%
-}
-
-The {\tt menvcfg} CSR is an MXLEN-bit read/write register,
-formatted for MXLEN=64 as shown in Figure~\ref{fig:menvcfg},
-that controls certain characteristics of the execution environment
-for modes less privileged than M.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{cc@{}Mcc@{}W@{}Wc}
-\instbit{63} &
-\instbit{62} &
-\instbitrange{61}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbitrange{5}{4} &
-\instbitrange{3}{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{STCE} &
-\multicolumn{1}{c|}{PBMTE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{CBZE} &
-\multicolumn{1}{c|}{CBCFE} &
-\multicolumn{1}{c|}{CBIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{FIOM} \\
-\hline
-1 & 1 & 54 & 1 & 1 & 2 & 3 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine environment configuration register ({\tt menvcfg}) for MXLEN=64.}
-\label{fig:menvcfg}
-\end{figure}
-
-If bit FIOM (Fence of I/O implies Memory) is set to one in {\tt menvcfg},
-FENCE instructions executed in modes less privileged than M are modified so
-the requirement to order accesses to device I/O implies also the requirement
-to order main memory accesses.
-Table~\ref{tab:menvcfg-FIOM} details the modified interpretation of
-FENCE instruction bits PI, PO, SI, and SO for modes less privileged than M
-when FIOM=1.
-
-Similarly, for modes less privileged than M when FIOM=1,
-if an atomic instruction that accesses a region ordered as device I/O
-has its {\em aq} and/or {\em rl} bit set, then that instruction is ordered
-as though it accesses both device I/O and memory.
-
-If S-mode is not supported, or if {\tt satp}.MODE is read-only zero
-(always Bare), the implementation may make FIOM read-only zero.
-
-\begin{table}[h!]
-\begin{center}
-\begin{tabular}{|c|l|}
-\hline
-Instruction bit & Meaning when set \\
-\hline
-PI & Predecessor device input and memory reads (PR implied) \\
-PO & Predecessor device output and memory writes (PW implied) \\
-\hline
-SI & Successor device input and memory reads (SR implied) \\
-SO & Successor device output and memory writes (SW implied) \\
-\hline
-\end{tabular}
-\end{center}
-\vspace{-0.1in}
-\caption{%
-Modified interpretation of FENCE predecessor and successor sets
-for modes less privileged than M when FIOM=1.%
-}
-\label{tab:menvcfg-FIOM}
-\end{table}
-
-\begin{commentary}
-Bit FIOM is needed in {\tt menvcfg} so M-mode can emulate the
-hypervisor extension of Chapter~\ref{hypervisor}, which has an
-equivalent FIOM bit in the hypervisor CSR {\tt henvcfg}.
-\end{commentary}
-
-The PBMTE bit controls whether the Svpbmt extension is available for use in
-S-mode and G-stage address translation (i.e., for page tables pointed to by
-{\tt satp} or {\tt hgatp}).
-When PBMTE=1, Svpbmt is available for S-mode and G-stage address translation.
-When PBMTE=0, the implementation behaves as though Svpbmt were not implemented.
-If Svpbmt is not implemented, PBMTE is read-only zero.
-Furthermore, for implementations with the hypervisor extension,
-{\tt henvcfg}.PBMTE is read-only zero if {\tt menvcfg}.PBMTE is zero.
-
-The definition of the STCE field will be furnished by the
-forthcoming Sstc extension.
-Its allocation within {\tt menvcfg} may change prior to the ratification
-of that extension.
-
-The definition of the CBZE field will be furnished by the
-forthcoming Zicboz extension.
-Its allocation within {\tt menvcfg} may change prior to the ratification
-of that extension.
-
-The definitions of the CBCFE and CBIE fields will be furnished by the
-forthcoming Zicbom extension.
-Their allocations within {\tt menvcfg} may change prior to the ratification
-of that extension.
-
-When MXLEN=32, {\tt menvcfg} contains the same fields as bits 31:0
-of {\tt menvcfg} when MXLEN=64.
-Additionally, when MXLEN=32, {\tt menvcfgh} is a 32-bit read/write register that
-contains the same fields as bits 63:32 of {\tt menvcfg} when
-MXLEN=64.
-Register {\tt menvcfgh} does not exist when MXLEN=64.
-
-If U-mode is not supported, then registers {\tt menvcfg} and {\tt menvcfgh} do
-not exist.
-
-\subsection{Machine Security Configuration Register ({\tt mseccfg})}
-\label{sec:mseccfg}
-
-{\tt mseccfg} is an optional MXLEN-bit read/write register, formatted as shown
-in Figure~\ref{fig:mseccfg}, that controls security features.
-
-When MXLEN=32 only, {\tt mseccfgh} is a 32-bit read/write register that
-contains the same fields as {\tt mseccfg} bits 63:32 when MXLEN=64.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{MccFccc}
-\instbitrange{XLEN-1}{10} &
-\instbit{9} &
-\instbit{8} &
-\instbitrange{7}{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{SSEED} &
-\multicolumn{1}{c|}{USEED} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{RLB} &
-\multicolumn{1}{c|}{MMWP} &
-\multicolumn{1}{c|}{MML} \\
-\hline
-XLEN-10 & 1 & 1 & 5 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Machine security configuration register ({\tt mseccfg}).}
-\label{fig:mseccfg}
-\end{figure*}
-
-The definitions of the SSEED and USEED fields will be furnished by the
-forthcoming entropy-source extension, Zkr.
-Their allocations within {\tt mseccfg} may change prior to the ratification
-of that extension.
-
-The definitions of the RLB, MMWP, and MML fields will be furnished by the
-forthcoming PMP-enhancement extension, Smepmp.
-Their allocations within {\tt mseccfg} may change prior to the ratification
-of that extension.
-
-\section{Machine-Level Memory-Mapped Registers}
-
-\subsection{Machine Timer Registers ({\tt mtime} and {\tt mtimecmp})}
-
-Platforms provide a real-time counter, exposed as a memory-mapped
-machine-mode read-write register, {\tt mtime}. {\tt mtime} must
-increment at constant frequency, and the platform must provide a
-mechanism for determining the period of an {\tt mtime} tick. The {\tt
- mtime} register will wrap around if the count overflows.
-
-The {\tt mtime} register has a 64-bit precision on all RV32 and RV64
-systems. Platforms provide a 64-bit memory-mapped machine-mode
-timer compare register ({\tt mtimecmp}).
-A machine timer interrupt becomes pending whenever {\tt mtime} contains
-a value greater than or equal to {\tt mtimecmp}, treating the values
-as unsigned integers.
-The interrupt remains posted until {\tt mtimecmp} becomes greater than
-{\tt mtime} (typically as a result of writing {\tt mtimecmp}).
-The interrupt will only be taken if interrupts
-are enabled and the MTIE bit is set in the {\tt mie} register.
-
-\begin{figure}[h!]
- {\footnotesize
- \begin{center}
- \begin{tabular}{@{}J}
- \instbitrange{63}{0} \\
- \hline
- \multicolumn{1}{|c|}{\tt mtime} \\
- \hline
- 64 \\
- \end{tabular}
- \end{center}
- }
- \vspace{-0.1in}
- \caption{Machine time register (memory-mapped control register).}
-\end{figure}
-
-\begin{figure}[h!]
- {\footnotesize
- \begin{center}
- \begin{tabular}{@{}J}
- \instbitrange{63}{0} \\
- \hline
- \multicolumn{1}{|c|}{\tt mtimecmp} \\
- \hline
- 64 \\
- \end{tabular}
- \end{center}
- }
- \vspace{-0.1in}
- \caption{Machine time compare register (memory-mapped control register).}
-\end{figure}
-
-\begin{commentary}
- The timer facility is defined to use wall-clock time rather than a
- cycle counter to support modern processors that run with a highly
- variable clock frequency to save energy through dynamic voltage and
- frequency scaling.
-
- Accurate real-time clocks (RTCs) are relatively expensive to provide
- (requiring a crystal or MEMS oscillator) and have to run even when the
- rest of system is powered down, and so there is usually only one in a
- system located in a different frequency/voltage domain from the
- processors. Hence, the RTC must be shared by all the harts in a
- system and accesses to the RTC will potentially incur the penalty of a
- voltage-level-shifter and clock-domain crossing. It is thus more
- natural to expose {\tt mtime} as a memory-mapped register than as a CSR.
-
- Lower privilege levels do not have their own {\tt timecmp} registers.
- Instead, machine-mode software can implement any number of virtual timers on
- a hart by multiplexing the next timer interrupt into the {\tt mtimecmp}
- register.
-
- Simple fixed-frequency systems can use a single clock for both cycle
- counting and wall-clock time.
-\end{commentary}
-
-Writes to {\tt mtime} and {\tt mtimecmp} are guaranteed to be reflected in
-MTIP eventually, but not necessarily immediately.
-
-\begin{commentary}
- A spurious timer interrupt might occur if an interrupt handler increments {\tt
- mtimecmp} then immediately returns, because MTIP might not yet have fallen in
- the interim. All software should be written to assume this event is possible,
- but most software should assume this event is extremely unlikely. It is
- almost always more performant to incur an occasional spurious timer interrupt
- than to poll MTIP until it falls.
-\end{commentary}
-
-In RV32, memory-mapped writes to {\tt mtimecmp} modify only one 32-bit
-part of the register. The following code sequence sets a 64-bit {\tt
- mtimecmp} value without spuriously generating a timer interrupt due
-to the intermediate value of the comparand:
-
-\begin{figure}[h!]
- \begin{center}
- \begin{verbatim}
- # New comparand is in a1:a0.
- li t0, -1
- la t1, mtimecmp
- sw t0, 0(t1) # No smaller than old value.
- sw a1, 4(t1) # No smaller than new value.
- sw a0, 0(t1) # New value.
- \end{verbatim}
- \end{center}
- \caption{Sample code for setting the 64-bit time comparand in RV32, assuming
- a little-endian memory system and that the registers live in a strongly
- ordered I/O region. Storing -1 to the low-order bits of {\tt mtimecmp}
- prevents {\tt mtimecmp} from temporarily becoming smaller than the lesser
- of the old and new values.}
- \label{mtimecmph}
-\end{figure}
-
-For RV64, naturally aligned 64-bit memory accesses to the {\tt mtime} and {\tt
- mtimecmp} registers are additionally supported and are atomic.
-
-\section{Machine-Mode Privileged Instructions}
-
-\subsection{Environment Call and Breakpoint}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{M@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct12} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-12 & 5 & 3 & 5 & 7 \\
-ECALL & 0 & PRIV & 0 & SYSTEM \\
-EBREAK & 0 & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The ECALL instruction is used to make a request to the supporting execution
-environment. When executed in U-mode, S-mode, or M-mode, it generates an
-environment-call-from-U-mode exception, environment-call-from-S-mode exception, or environment-call-from-M-mode exception, respectively, and performs no other operation.
-\begin{commentary}
-ECALL generates a different exception for each originating privilege mode
-so that environment call exceptions can be selectively delegated. A typical
-use case for Unix-like operating systems is to delegate to S-mode the
-environment-call-from-U-mode exception but not the others.
-\end{commentary}
-
-The EBREAK instruction is used by debuggers to cause control to be transferred
-back to a debugging environment. It generates a breakpoint exception and
-performs no other operation.
-\begin{commentary}
-As described in the ``C'' Standard Extension for Compressed Instructions in
-Volume I of this manual, the C.EBREAK instruction performs the same operation
-as the EBREAK instruction.
-\end{commentary}
-
-ECALL and EBREAK cause the receiving privilege mode's {\tt epc} register
-to be set to the address of the ECALL or EBREAK instruction itself, {\em not}
-the address of the following instruction.
-As ECALL and EBREAK cause synchronous exceptions, they are not considered to
-retire, and should not increment the {\tt minstret} CSR.
-
-\subsection{Trap-Return Instructions}
-\label{otherpriv}
-
-Instructions to return from trap are encoded under the PRIV
-minor opcode.
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{M@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct12} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-12 & 5 & 3 & 5 & 7 \\
-MRET/SRET & 0 & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-To return after handling a trap, there are separate trap return
-instructions per privilege level, MRET and SRET. MRET is
-always provided. SRET must be provided if supervisor mode is
-supported, and should raise an illegal instruction exception otherwise. SRET
-should also raise an illegal instruction exception when TSR=1 in {\tt mstatus},
-as described in Section~\ref{virt-control}.
-An {\em x}\/RET instruction can be executed in privilege mode {\em x}
-or higher, where executing a lower-privilege {\em x}\/RET instruction
-will pop the relevant lower-privilege interrupt enable and privilege
-mode stack. In addition to manipulating the privilege stack as
-described in Section~\ref{privstack}, {\em x}\/RET sets the {\tt pc}
-to the value stored in the {\em x}\/{\tt epc} register.
-
-If the A extension is supported, the {\em x}\/RET instruction is
-allowed to clear any outstanding LR address reservation but is not
-required to. Trap handlers should explicitly clear the reservation if
-required (e.g., by using a dummy SC) before executing the {\em x}\/RET.
-
-\begin{commentary}
- If {\em x}\/RET instructions always cleared LR reservations, it would
- be impossible to single-step through LR/SC sequences using a
- debugger.
-\end{commentary}
-
-\subsection{Wait for Interrupt}
-\label{wfi}
-
-The Wait for Interrupt instruction (WFI) provides a hint to the
-implementation that the current hart can be stalled until an interrupt
-might need servicing. Execution of the WFI instruction can also be
-used to inform the hardware platform that suitable interrupts should
-preferentially be routed to this hart. WFI is available in all
-privileged modes, and optionally available to U-mode. This instruction may
-raise an illegal instruction exception when TW=1 in {\tt mstatus}, as described
-in Section~\ref{virt-control}.
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{M@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct12} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-12 & 5 & 3 & 5 & 7 \\
-WFI & 0 & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-If an enabled interrupt is present or later becomes present while the
-hart is stalled, the interrupt trap will be taken on the
-following instruction, i.e., execution resumes in the trap handler and
-{\tt mepc} = {\tt pc} + 4.
-
-\begin{commentary}
-The following instruction takes the interrupt trap so
-that a simple return from the trap handler will execute code after the
-WFI instruction.
-\end{commentary}
-
-The purpose of the WFI instruction is to provide a hint to the implementation,
-and so a legal implementation is to simply implement WFI as a NOP.
-
-\begin{commentary}
-If the implementation does not stall the hart on execution of the
-instruction, then the interrupt will be taken on some instruction in
-the idle loop containing the WFI, and on a simple return from the
-handler, the idle loop will resume execution.
-\end{commentary}
-
-The WFI instruction can also be executed when interrupts are disabled. The
-operation of WFI must be unaffected by the global interrupt bits in {\tt
-mstatus} (MIE and SIE) and the delegation register {\tt mideleg}
-(i.e., the hart must resume if a locally enabled interrupt becomes pending,
-even if it has been delegated to a less-privileged mode), but should honor the
-individual interrupt enables (e.g, MTIE) (i.e., implementations should
-avoid resuming the hart if the interrupt is pending but not
-individually enabled). WFI is also required to resume execution for
-locally enabled interrupts pending at any privilege level, regardless
-of the global interrupt enable at each privilege level.
-
-If the event that causes the hart to resume execution does not cause
-an interrupt to be taken, execution will resume at {\tt pc} + 4, and
-software must determine what action to take, including looping back to
-repeat the WFI if there was no actionable event.
-
-\begin{commentary}
-By allowing wakeup when interrupts are disabled, an alternate entry
-point to an interrupt handler can be called that does not require
-saving the current context, as the current context can be saved or
-discarded before the WFI is executed.
-
-As implementations are free to implement WFI as a NOP, software must
-explicitly check for any relevant pending but disabled interrupts in
-the code following an WFI, and should loop back to the WFI if no
-suitable interrupt was detected. The {\tt mip} or {\tt sip}
-registers can be interrogated to determine the presence
-of any interrupt in machine or supervisor mode
-respectively.
-
-The operation of WFI is unaffected by the delegation register settings.
-
-WFI is defined so that an implementation can trap into a higher
-privilege mode, either immediately on encountering the WFI or after
-some interval to initiate a machine-mode transition to a lower power
-state, for example.
-\end{commentary}
-
-\begin{commentary}
-The same ``wait-for-event'' template might be used for possible future
-extensions that wait on memory locations changing, or message
-arrival.
-\end{commentary}
-
-\subsection{Custom SYSTEM Instructions}
-\label{sec:customsys}
-
-The subspace of the SYSTEM major opcode shown in Figure~\ref{fig:customsys}
-is designated for custom use.
-It is recommended that these instructions use bits 29:28 to designate the
-minimum required privilege mode, as do other SYSTEM instructions.
-
-\begin{figure}[h!]
-\begin{center}
-\begin{tabular}{Y@{}S@{}F@{}Y@{}Rc}
-\\
-\instbitrange{31}{26} &
-\instbitrange{25}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\cline{1-5}
-\multicolumn{1}{|c|}{funct6} &
-\multicolumn{1}{c|}{\em custom} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{\em custom} &
-\multicolumn{1}{c|}{opcode} &
-Recommended Purpose \\
-\cline{1-5}
-6 & 11 & 3 & 5 & 7 \\
-100011 & {\em custom} & 0 & {\em custom} & SYSTEM & Unprivileged or User-Level \\
-110011 & {\em custom} & 0 & {\em custom} & SYSTEM & Unprivileged or User-Level \\
-100111 & {\em custom} & 0 & {\em custom} & SYSTEM & Supervisor-Level \\
-110111 & {\em custom} & 0 & {\em custom} & SYSTEM & Supervisor-Level \\
-101011 & {\em custom} & 0 & {\em custom} & SYSTEM & Hypervisor-Level \\
-111011 & {\em custom} & 0 & {\em custom} & SYSTEM & Hypervisor-Level \\
-101111 & {\em custom} & 0 & {\em custom} & SYSTEM & Machine-Level \\
-111111 & {\em custom} & 0 & {\em custom} & SYSTEM & Machine-Level \\
-\end{tabular}
-\end{center}
-\caption{SYSTEM instruction encodings designated for custom use.}
-\label{fig:customsys}
-\end{figure}
-
-\section{Reset}
-\label{sec:reset}
-
-Upon reset, a hart's privilege mode is set to M. The {\tt mstatus} fields MIE
-and MPRV are reset to 0.
-If little-endian memory accesses are supported, the {\tt mstatus}/{\tt mstatush}
-field MBE is reset to 0.
-The {\tt misa} register is reset to enable the maximal set of supported
-extensions and widest MXLEN, as described in Section~\ref{sec:misa}.
-For implementations with the ``A'' standard extension, there is no valid load
-reservation.
-The {\tt pc} is set to an implementation-defined
-reset vector. The {\tt mcause} register is set to a value indicating the
-cause of the reset.
-Writable PMP registers' A and L fields are set to 0, unless the platform
-mandates a different reset value for some PMP registers' A and L fields.
-If the hypervisor extension is implemented, the {\tt hgatp}.MODE and
-{\tt vsatp}.MODE fields are reset to 0.
-If the Smrnmi extension is implemented, the {\tt mnstatus}.NMIE field
-is reset to 0.
-No \warl\ field contains an illegal value.
-All other hart state is \unspecified.
-
-The {\tt mcause} values after reset have implementation-specific
-interpretation, but the value 0 should be returned on implementations
-that do not distinguish different reset conditions. Implementations
-that distinguish different reset conditions should only use 0 to
-indicate the most complete reset.
-
-\begin{commentary}
-Some designs may have multiple causes of reset (e.g., power-on reset,
-external hard reset, brownout detected, watchdog timer elapse,
-sleep-mode wakeup), which machine-mode software and debuggers may wish
-to distinguish.
-
-{\tt mcause} reset values may alias {\tt mcause} values following synchronous
-exceptions. There should be no ambiguity in this overlap, since on reset the
-{\tt pc} is typically set to a different value than on other traps.
-\end{commentary}
-
-\section{Non-Maskable Interrupts}
-\label{sec:nmi}
-
-Non-maskable interrupts (NMIs) are only used for hardware error
-conditions, and cause an immediate jump to an implementation-defined
-NMI vector running in M-mode regardless of the state of a hart's
-interrupt enable bits. The {\tt mepc} register is written with the
-virtual address of the instruction that was interrupted,
-and {\tt mcause} is set to a value indicating the source of the
-NMI. The NMI can thus overwrite state in an active machine-mode
-interrupt handler.
-
-The values written to {\tt mcause} on an NMI are
-implementation-defined. The high Interrupt bit of {\tt mcause} should
-be set to indicate that this was an interrupt. An Exception Code of 0
-is reserved to mean ``unknown cause'' and implementations that do not
-distinguish sources of NMIs via the {\tt mcause} register should
-return 0 in the Exception Code.
-
-Unlike resets, NMIs do not reset processor state, enabling diagnosis,
-reporting, and possible containment of the hardware error.
-
-\section{Physical Memory Attributes}
-\label{sec:pma}
-
-The physical memory map for a complete system includes various address
-ranges, some corresponding to memory regions, some to memory-mapped
-control registers, and some to vacant holes in the address space. Some
-memory regions might not support reads, writes, or execution; some
-might not support subword or subblock accesses; some might not support
-atomic operations; and some might not support cache coherence or might
-have different memory models. Similarly, memory-mapped control
-registers vary in their supported access widths, support for atomic
-operations, and whether read and write accesses have associated side
-effects. In RISC-V systems, these properties and capabilities of each
-region of the machine's physical address space are termed {\em
- physical memory attributes} (PMAs). This section describes RISC-V
-PMA terminology and how RISC-V systems implement and check PMAs.
-
-PMAs are inherent properties of the underlying hardware and rarely
-change during system operation. Unlike physical memory protection
-values described in Section~\ref{sec:pmp}, PMAs do not vary by
-execution context. The PMAs of some memory regions are fixed at chip
-design time---for example, for an on-chip ROM. Others are fixed at
-board design time, depending, for example, on which other chips are
-connected to off-chip buses. Off-chip buses might also support
-devices that could be changed on every power cycle (cold pluggable) or
-dynamically while the system is running (hot pluggable). Some devices
-might be configurable at run time to support different uses that imply
-different PMAs---for example, an on-chip scratchpad RAM might be
-cached privately by one core in one end-application, or accessed as a
-shared non-cached memory in another end-application.
-
-Most systems will require that at least some PMAs are dynamically
-checked in hardware later in the execution pipeline after the physical
-address is known, as some operations will not be supported at all
-physical memory addresses, and some operations require knowing the
-current setting of a configurable PMA attribute. While many other architectures
-specify some PMAs in the virtual memory page tables and use the TLB to
-inform the pipeline of these properties, this approach injects platform-specific
-information into a virtualized layer and can cause system errors
-unless attributes are correctly initialized in each page-table entry
-for each physical memory region. In addition, the available
-page sizes might not be optimal for specifying attributes in the
-physical memory space, leading to address-space fragmentation and
-inefficient use of expensive TLB entries.
-
-For RISC-V, we separate out specification and checking of PMAs into a
-separate hardware structure, the {\em PMA checker}. In many cases,
-the attributes are known at system design time for each physical
-address region, and can be hardwired into the PMA checker. Where the
-attributes are run-time configurable, platform-specific memory-mapped
-control registers can be provided to specify these attributes at a
-granularity appropriate to each region on the platform (e.g., for an
-on-chip SRAM that can be flexibly divided between cacheable and
-uncacheable uses). PMAs are checked for any access to physical
-memory, including accesses that have undergone virtual to physical
-memory translation. To aid in system debugging, we strongly recommend
-that, where possible, RISC-V processors precisely trap physical memory
-accesses that fail PMA checks. Precisely trapped PMA violations manifest
-as instruction, load, or store access-fault exceptions, distinct from
-virtual-memory page-fault exceptions. Precise PMA traps might not always be
-possible, for example, when probing a legacy bus architecture that
-uses access failures as part of the discovery mechanism. In this
-case, error responses from peripheral devices will be reported as imprecise
-bus-error interrupts.
-
-PMAs must also be readable by software to correctly access certain
-devices or to correctly configure other hardware components that
-access memory, such as DMA engines. As PMAs are tightly tied to a
-given physical platform's organization, many details are inherently
-platform-specific, as is the means by which software can learn the PMA
-values for a platform. Some
-devices, particularly legacy buses, do not support discovery of PMAs
-and so will give error responses or time out if an unsupported access
-is attempted. Typically, platform-specific machine-mode code will
-extract PMAs and ultimately present this information to higher-level
-less-privileged software using some standard representation.
-
-Where platforms support dynamic reconfiguration of PMAs, an interface
-will be provided to set the attributes by passing requests to a
-machine-mode driver that can correctly reconfigure the platform. For
-example, switching cacheability attributes on some memory regions
-might involve platform-specific operations, such as cache flushes,
-that are available only to machine-mode.
-
-\subsection{Main Memory versus I/O versus Vacant Regions}
-
-The most important characterization of a given memory address range is
-whether it holds regular main memory, or I/O devices, or is vacant.
-Regular main memory is required to have a number of properties,
-specified below, whereas I/O devices can have a much broader range of
-attributes. Memory regions that do not fit into regular main
-memory, for example, device scratchpad RAMs, are categorized as I/O
-regions. Vacant regions are also classified as I/O regions but with
-attributes specifying that no accesses are supported.
-
-\subsection{Supported Access Type PMAs}
-
-Access types specify which access widths, from 8-bit byte to long
-multi-word burst, are supported, and also whether misaligned accesses
-are supported for each access width.
-
-\begin{commentary}
-Although software running on a RISC-V hart cannot directly generate
-bursts to memory, software might have to program DMA engines to access
-I/O devices and might therefore need to know which access sizes are
-supported.
-\end{commentary}
-
-Main memory regions always support read and write of all
-access widths required by the attached devices, and can
-specify whether instruction fetch is supported.
-
-\begin{commentary}
-Some platforms might mandate that all of main memory support instruction
-fetch.
-Other platforms might prohibit instruction fetch from some main memory
-regions.
-\end{commentary}
-
-\begin{commentary}
-In some cases, the design of a processor or device accessing main
-memory might support other widths, but must be able to function with
-the types supported by the main memory.
-\end{commentary}
-
-I/O regions can specify which combinations of read, write, or execute
-accesses to which data widths are supported.
-
-For systems with page-based virtual memory, I/O and memory regions can
-specify which combinations of hardware page-table reads and hardware
-page-table writes are supported.
-
-\begin{commentary}
-Unix-like operating systems generally require that all of cacheable main
-memory supports page-table walks.
-\end{commentary}
-
-\subsection{Atomicity PMAs}
-
-Atomicity PMAs describes which atomic instructions are supported in
-this address region.
-Support for atomic instructions is divided into two
-categories: {\em LR/SC} and {\em AMOs}.
-
-\begin{commentary}
-Some platforms might mandate that all of cacheable main memory support
-all atomic operations required by the attached processors.
-\end{commentary}
-
-\subsubsection{AMO PMA}
-
- Within AMOs, there are four levels of
-support: {\em AMONone}, {\em AMOSwap}, {\em AMOLogical}, and {\em
- AMOArithmetic}. AMONone indicates that no AMO operations are
-supported. AMOSwap indicates that only {\tt amoswap} instructions are
-supported in this address range. AMOLogical indicates that swap
-instructions plus all the logical AMOs ({\tt amoand}, {\tt amoor},
-{\tt amoxor}) are supported. AMOArithmetic indicates that all RISC-V
-AMOs are supported. For each level of support, naturally aligned AMOs
-of a given width are supported if the underlying memory region
-supports reads and writes of that width.
-Main memory and I/O regions may only support a subset or none of the
-processor-supported atomic operations.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|l|l|}
- \hline
- AMO Class & Supported Operations \\
- \hline
- AMONone & {\em None} \\
- AMOSwap & {\tt amoswap} \\
- AMOLogical & above + {\tt amoand}, {\tt amoor}, {\tt amoxor} \\
- AMOArithmetic & above + {\tt amoadd}, {\tt amomin}, {\tt amomax}, {\tt amominu}, {\tt amomaxu} \\
- \hline
-\end{tabular}
-\end{center}
-\caption{Classes of AMOs supported by I/O regions.}
-\label{amoclasses}
-\end{table*}
-
-\begin{commentary}
-We recommend providing at least AMOLogical support for I/O regions
-where possible.
-\end{commentary}
-
-\subsubsection{Reservability PMA}
-
-For {\em LR/SC}, there are three levels of support indicating combinations of
-the reservability and eventuality properties: {\em RsrvNone},
-{\em RsrvNonEventual}, and {\em RsrvEventual}.
-RsrvNone indicates that no LR/SC operations are supported (the location is
-non-reservable). RsrvNonEventual indicates that the operations are supported
-(the location is reservable), but without the eventual success guarantee
-described in the unprivileged ISA specification. RsrvEventual indicates that
-the operations are supported and provide the eventual success guarantee.
-
-\begin{commentary}
-We recommend providing RsrvEventual support for main memory regions
-where possible. Most I/O regions will not support LR/SC accesses, as
-these are most conveniently built on top of a cache-coherence scheme, but some
-may support RsrvNonEventual or RsrvEventual.
-\end{commentary}
-
-\begin{commentary}
-When LR/SC is used for memory locations marked RsrvNonEventual, software should
-provide alternative fall-back mechanisms used when lack of progress is
-detected.
-\end{commentary}
-
-\subsubsection{Alignment}
-
-Memory regions that support aligned LR/SC or aligned AMOs might also support
-misaligned LR/SC or misaligned AMOs for some addresses and access widths. If,
-for a given address and access width, a misaligned LR/SC or AMO generates
-an address-misaligned exception, then {\em all} loads, stores, LRs/SCs, and
-AMOs using that address and access width must generate address-misaligned
-exceptions.
-\begin{commentary}
-The standard ``A'' extension does not support misaligned AMOs or LR/SC pairs.
-Support for misaligned AMOs is provided by the standard ``Zam'' extension.
-Support for misaligned LR/SC sequences is not currently standardized,
-so LR and SC to misaligned addresses must raise an exception.
-
-Mandating that misaligned loads and stores raise address-misaligned exceptions
-wherever misaligned AMOs raise address-misaligned exceptions permits the
-emulation of misaligned AMOs in an M-mode trap handler. The handler
-guarantees atomicity by acquiring a global mutex and emulating the access
-within the critical section. Provided that the handler for misaligned loads
-and stores uses the same mutex, all accesses to a given address that use the
-same word size will be mutually atomic.
-\end{commentary}
-
-Implementations may raise access-fault exceptions instead of address-misaligned
-exceptions for some misaligned accesses, indicating the instruction should not
-be emulated by a trap handler. If, for a given address and access width, all
-misaligned LRs/SCs and AMOs generate access-fault exceptions, then regular
-misaligned loads and stores using the same address and access width are not
-required to execute atomically.
-
-\subsection{Memory-Ordering PMAs}
-
-Regions of the address space are classified as either {\em main
- memory} or {\em I/O} for the purposes of ordering by the FENCE
-instruction and atomic-instruction ordering bits.
-
-Accesses by one hart to main memory regions are observable not only by
-other harts but also by other devices with the capability to initiate
-requests in the main memory system (e.g., DMA engines).
-Coherent main memory regions always have either the RVWMO or RVTSO memory
-model.
-Incoherent main memory regions have an implementation-defined memory model.
-
-Accesses by one hart to an I/O region are observable not only by other harts
-and bus mastering devices but also by the targeted I/O devices, and I/O
-regions may be accessed with either {\em relaxed} or {\em strong} ordering.
-Accesses to an I/O region with relaxed ordering are generally observed by
-other harts and bus mastering devices in a manner similar to the ordering of
-accesses to an RVWMO memory region, as discussed in Section A.4.2 in Volume
-I of this specification.
-By contrast, accesses to an I/O region with strong ordering are generally
-observed by other harts and bus mastering devices in program order.
-
-Each strongly ordered I/O region specifies a numbered ordering
-channel, which is a mechanism by which ordering guarantees can be
-provided between different I/O regions. Channel 0 is used to indicate
-point-to-point strong ordering only, where only accesses by the hart to the
-single associated I/O region are strongly ordered.
-
-Channel 1 is used to provide global strong ordering across all I/O
-regions. Any accesses by a hart to any I/O region associated with
-channel 1 can only be observed to have occurred in program order by all
-other harts and I/O devices, including relative to accesses made by
-that hart to relaxed I/O regions or strongly ordered I/O regions with
-different channel numbers. In other words, any access to a region in
-channel 1 is equivalent to executing a {\tt fence io,io}
-instruction before and after the instruction.
-
-Other larger channel numbers provide program ordering to accesses by
-that hart across any regions with the same channel number.
-
-Systems might support dynamic configuration of ordering properties on
-each memory region.
-
-\begin{commentary}
-Strong ordering can be used to improve compatibility with legacy
-device driver code, or to enable increased performance compared to
-insertion of explicit ordering instructions when the implementation is
-known to not reorder accesses.
-
-Local strong ordering (channel 0) is the default form of strong
-ordering as it is often straightforward to provide if there is only a
-single in-order communication path between the hart and the I/O
-device.
-
-Generally, different strongly ordered I/O regions can share the same
-ordering channel without additional ordering hardware if they share
-the same interconnect path and the path does not reorder requests.
-\end{commentary}
-
-\subsection{Coherence and Cacheability PMAs}
-
-Coherence is a property defined for a single physical address, and
-indicates that writes to that address by one agent will eventually be
-made visible to other agents in the system. Coherence is not to be
-confused with the memory consistency model of a system, which defines
-what values a memory read can return given the previous history of
-reads and writes to the entire memory system. In RISC-V platforms,
-the use of hardware-incoherent regions is discouraged due to software
-complexity, performance, and energy impacts.
-
-The cacheability of a memory region should not affect the software
-view of the region except for differences reflected in other PMAs,
-such as main memory versus I/O classification, memory ordering,
-supported accesses and atomic operations, and coherence. For this
-reason, we treat cacheability as a platform-level setting managed by
-machine-mode software only.
-
-Where a platform supports configurable cacheability settings for a
-memory region, a platform-specific machine-mode routine will change
-the settings and flush caches if necessary, so the system is only
-incoherent during the transition between cacheability settings. This
-transitory state should not be visible to lower privilege levels.
-
-\begin{commentary}
-Coherence is straightforward to provide for a shared memory region
-that is not cached by any agent. The PMA for such a region would
-simply indicate it should not be cached in a private or shared cache.
-
-Coherence is also straightforward for read-only regions, which can be
-safely cached by multiple agents without requiring a cache-coherence
-scheme. The PMA for this region would indicate that it can be cached,
-but that writes are not supported.
-
-Some read-write regions might only be accessed by a single agent, in
-which case they can be cached privately by that agent without
-requiring a coherence scheme. The PMA for such regions would indicate
-they can be cached. The data can also be cached in a shared cache, as
-other agents should not access the region.
-
-If an agent can cache a read-write region that is accessible by other
-agents, whether caching or non-caching, a cache-coherence scheme is
-required to avoid use of stale values. In regions lacking hardware
-cache coherence (hardware-incoherent regions), cache coherence can be
-implemented entirely in software, but software coherence schemes are
-notoriously difficult to implement correctly and often have severe
-performance impacts due to the need for conservative software-directed
-cache-flushing. Hardware cache-coherence schemes require more complex
-hardware and can impact performance due to the cache-coherence probes,
-but are otherwise invisible to software.
-
-For each hardware cache-coherent region, the PMA would indicate that
-the region is coherent and which hardware coherence controller to use
-if the system has multiple coherence controllers. For some systems,
-the coherence controller might be an outer-level shared cache, which
-might itself access further outer-level cache-coherence controllers
-hierarchically.
-
-Most memory regions within a platform will be coherent to software,
-because they will be fixed as either uncached, read-only, hardware
-cache-coherent, or only accessed by one agent.
-\end{commentary}
-
-If a PMA indicates non-cacheability, then accesses to that region must
-be satisfied by the memory itself, not by any caches.
-
-\begin{commentary}
-For implementations with a cacheability-control mechanism, the situation
-may arise that a program uncacheably accesses a memory location that is
-currently cache-resident.
-In this situation, the cached copy must be ignored.
-This constraint is necessary to prevent more-privileged modes' speculative
-cache refills from affecting the behavior of less-privileged modes'
-uncacheable accesses.
-\end{commentary}
-
-\subsection{Idempotency PMAs}
-
-Idempotency PMAs describe whether reads and writes to an address
-region are idempotent. Main memory regions are assumed to be
-idempotent. For I/O regions, idempotency on reads and writes can be
-specified separately (e.g., reads are idempotent but writes are not).
-If accesses are non-idempotent, i.e., there is potentially a side
-effect on any read or write access, then speculative or redundant
-accesses must be avoided.
-
-For the purposes of defining the idempotency PMAs, changes in observed
-memory ordering created by redundant accesses are not considered a
-side effect.
-
-\begin{commentary}
-While hardware should always be designed to avoid speculative or
-redundant accesses to memory regions marked as non-idempotent, it is
-also necessary to ensure software or compiler optimizations do not
-generate spurious accesses to non-idempotent memory regions.
-\end{commentary}
-
-\begin{commentary}
-Non-idempotent regions might not support misaligned accesses. Misaligned
-accesses to such regions should raise access-fault exceptions rather than
-address-misaligned exceptions, indicating that software should not emulate the
-misaligned access using multiple smaller accesses, which could cause
-unexpected side effects.
-\end{commentary}
-
-For non-idempotent regions, implicit reads and writes must not be performed
-early or speculatively, with the following exceptions.
-When a non-speculative implicit read is performed, an implementation is
-permitted to additionally read any of the bytes within a naturally aligned
-power-of-2 region containing the address of the non-speculative implicit read.
-Furthermore, when a non-speculative instruction fetch is performed, an
-implementation is permitted to additionally read any of the bytes within the
-{\em next} naturally aligned power-of-2 region of the same size (with the
-address of the region taken modulo $2^{\text{XLEN}}$).
-The results of these additional reads may be used to satisfy subsequent early
-or speculative implicit reads.
-The size of these naturally aligned power-of-2 regions is
-implementation-defined, but, for systems with page-based virtual memory, must
-not exceed the smallest supported page size.
-
-\section{Physical Memory Protection}
-\label{sec:pmp}
-
-To support secure processing and contain faults, it is desirable to
-limit the physical addresses accessible by software running on a hart.
-An optional physical memory protection (PMP) unit provides
-per-hart machine-mode control registers to allow
-physical memory access privileges (read, write, execute) to be
-specified for each physical memory region. The PMP values are checked
-in parallel with the PMA checks described in Section~\ref{sec:pma}.
-
-The granularity of PMP access control settings are platform-specific,
-but the standard PMP
-encoding supports regions as small as four bytes. Certain regions' privileges
-can be hardwired---for example, some regions might only ever be visible in
-machine mode but in no lower-privilege layers.
-
-\begin{commentary}
-Platforms vary widely in demands for physical memory protection, and
-some platforms may provide other PMP structures in addition to or
-instead of the scheme described in this section.
-\end{commentary}
-
-PMP checks are applied to all accesses whose effective privilege mode is S or
-U, including instruction fetches and data accesses in S and U mode, and data
-accesses in M-mode when the MPRV bit in {\tt mstatus} is set and the MPP
-field in {\tt mstatus} contains S or U.
-PMP checks are also applied to page-table
-accesses for virtual-address translation, for which the effective
-privilege mode is S. Optionally, PMP checks may additionally apply
-to M-mode accesses, in which case the PMP registers themselves are
-locked, so that even M-mode software cannot change them until the hart is
-reset. In effect, PMP can {\em grant} permissions to S and U
-modes, which by default have none, and can {\em revoke} permissions
-from M-mode, which by default has full permissions.
-
-PMP violations are always trapped precisely at the processor.
-
-\subsection{Physical Memory Protection CSRs}
-
-PMP entries are described by an 8-bit configuration register and one MXLEN-bit
-address register. Some PMP settings additionally use the address register
-associated with the preceding PMP entry.
-Up to 64 PMP entries are supported.
-Implementations may implement zero, 16, or 64 PMP entries; the lowest-numbered
-PMP entries must be implemented first.
-All PMP CSR fields are \warl\ and may be read-only zero.
-PMP CSRs are only accessible to M-mode.
-
-The PMP configuration registers are densely packed into CSRs to minimize
-context-switch time.
-For RV32, sixteen CSRs, {\tt pmpcfg0}--{\tt pmpcfg15}, hold the configurations
-{\tt pmp0cfg}--{\tt pmp63cfg} for the 64 PMP entries, as shown in
-Figure~\ref{pmpcfg-rv32}.
-For RV64, eight even-numbered CSRs,
-{\tt pmpcfg0}, {\tt pmpcfg2}, \ldots, {\tt pmpcfg14}, hold the configurations
-for the 64 PMP entries, as shown in Figure~\ref{pmpcfg-rv64}.
-For RV64, the odd-numbered configuration registers,
-{\tt pmpcfg1}, {\tt pmpcfg3}, \ldots, {\tt pmpcfg15}, are illegal.
-
-\begin{commentary}
-RV64 systems use {\tt pmpcfg2}, rather than {\tt pmpcfg1}, to hold
-configurations for PMP entries 8--15. This design reduces the cost of
-supporting multiple MXLEN values, since the configurations for PMP
-entries 8--11 appear in {\tt pmpcfg2}[31:0] for both RV32 and RV64.
-\end{commentary}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}Y@{}Y@{}Y@{}Yl}
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-4}
-\multicolumn{1}{|c|}{pmp3cfg} &
-\multicolumn{1}{c|}{pmp2cfg} &
-\multicolumn{1}{c|}{pmp1cfg} &
-\multicolumn{1}{c|}{pmp0cfg} &
-\tt pmpcfg0 \\
-\cline{1-4}
-8 & 8 & 8 & 8 & \\
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-4}
-\multicolumn{1}{|c|}{pmp7cfg} &
-\multicolumn{1}{c|}{pmp6cfg} &
-\multicolumn{1}{c|}{pmp5cfg} &
-\multicolumn{1}{c|}{pmp4cfg} &
-\tt pmpcfg1 \\
-\cline{1-4}
-8 & 8 & 8 & 8 & \\
-~ \\
-\multicolumn{4}{c}{\Huge\vdots} & \ \\
-~ \\
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-4}
-\multicolumn{1}{|c|}{pmp63cfg} &
-\multicolumn{1}{c|}{pmp62cfg} &
-\multicolumn{1}{c|}{pmp61cfg} &
-\multicolumn{1}{c|}{pmp60cfg} &
-\tt pmpcfg15 \\
-\cline{1-4}
-8 & 8 & 8 & 8 & \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{RV32 PMP configuration CSR layout.}
-\label{pmpcfg-rv32}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}Y@{}Y@{}Y@{}Y@{}Y@{}Y@{}Y@{}Yl}
-\instbitrange{63}{56} &
-\instbitrange{55}{48} &
-\instbitrange{47}{40} &
-\instbitrange{39}{32} &
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-8}
-\multicolumn{1}{|c|}{pmp7cfg} &
-\multicolumn{1}{c|}{pmp6cfg} &
-\multicolumn{1}{c|}{pmp5cfg} &
-\multicolumn{1}{c|}{pmp4cfg} &
-\multicolumn{1}{c|}{pmp3cfg} &
-\multicolumn{1}{c|}{pmp2cfg} &
-\multicolumn{1}{c|}{pmp1cfg} &
-\multicolumn{1}{c|}{pmp0cfg} &
-\tt pmpcfg0 \\
-\cline{1-8}
-8 & 8 & 8 & 8 & 8 & 8 & 8 & 8 & \\
-\instbitrange{63}{56} &
-\instbitrange{55}{48} &
-\instbitrange{47}{40} &
-\instbitrange{39}{32} &
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-8}
-\multicolumn{1}{|c|}{pmp15cfg} &
-\multicolumn{1}{c|}{pmp14cfg} &
-\multicolumn{1}{c|}{pmp13cfg} &
-\multicolumn{1}{c|}{pmp12cfg} &
-\multicolumn{1}{c|}{pmp11cfg} &
-\multicolumn{1}{c|}{pmp10cfg} &
-\multicolumn{1}{c|}{pmp9cfg} &
-\multicolumn{1}{c|}{pmp8cfg} &
-\tt pmpcfg2 \\
-\cline{1-8}
-8 & 8 & 8 & 8 & 8 & 8 & 8 & 8 & \\
-~ \\
-\multicolumn{8}{c}{\Huge\vdots} & \ \\
-~ \\
-\instbitrange{63}{56} &
-\instbitrange{55}{48} &
-\instbitrange{47}{40} &
-\instbitrange{39}{32} &
-\instbitrange{31}{24} &
-\instbitrange{23}{16} &
-\instbitrange{15}{8} &
-\instbitrange{7}{0} & \\
-\cline{1-8}
-\multicolumn{1}{|c|}{pmp63cfg} &
-\multicolumn{1}{c|}{pmp62cfg} &
-\multicolumn{1}{c|}{pmp61cfg} &
-\multicolumn{1}{c|}{pmp60cfg} &
-\multicolumn{1}{c|}{pmp59cfg} &
-\multicolumn{1}{c|}{pmp58cfg} &
-\multicolumn{1}{c|}{pmp57cfg} &
-\multicolumn{1}{c|}{pmp56cfg} &
-\tt pmpcfg14 \\
-\cline{1-8}
-8 & 8 & 8 & 8 & 8 & 8 & 8 & 8 & \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{RV64 PMP configuration CSR layout.}
-\label{pmpcfg-rv64}
-\end{figure}
-
-The PMP address registers are CSRs named {\tt pmpaddr0}--{\tt pmpaddr63}.
-Each PMP address register encodes bits 33--2 of a 34-bit physical address for
-RV32, as shown in Figure~\ref{pmpaddr-rv32}. For RV64, each PMP address
-register encodes bits 55--2 of a 56-bit physical address, as shown in
-Figure~\ref{pmpaddr-rv64}. Not all physical address bits may be implemented,
-and so the {\tt pmpaddr} registers are \warl.
-
-\begin{commentary}
-The Sv32 page-based virtual-memory scheme described in Section~\ref{sec:sv32}
-supports 34-bit physical addresses for RV32, so the PMP scheme must support
-addresses wider than XLEN for RV32.
-The Sv39 and Sv48 page-based virtual-memory schemes described in
-Sections~\ref{sec:sv39} and~\ref{sec:sv48} support a 56-bit physical address
-space, so the RV64 PMP address registers impose the same limit.
-\end{commentary}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{31}{0} \\
-\hline
-\multicolumn{1}{|c|}{address[33:2] (\warl)} \\
-\hline
-32 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{PMP address register format, RV32.}
-\label{pmpaddr-rv32}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}F@{}J}
-\instbitrange{63}{54} &
-\instbitrange{53}{0} \\
-\hline
-\multicolumn{1}{|c|}{0 (\warl)} &
-\multicolumn{1}{c|}{address[55:2] (\warl)} \\
-\hline
-10 & 54 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{PMP address register format, RV64.}
-\label{pmpaddr-rv64}
-\end{figure}
-
-Figure~\ref{pmpcfg} shows the layout of a PMP configuration register. The R,
-W, and X bits, when set, indicate that the PMP entry permits read, write, and
-instruction execution, respectively. When one of these bits is clear, the
-corresponding access type is denied.
-The R, W, and X fields form a collective \warl\ field for which the
-combinations with R=0 and W=1 are reserved.
-The remaining two fields, A and L, are described in the following sections.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{YSSYYY}
-\instbit{7} &
-\instbitrange{6}{5} &
-\instbitrange{4}{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{L (\warl)} &
-\multicolumn{1}{c|}{0 (\warl)} &
-\multicolumn{1}{c|}{A (\warl)} &
-\multicolumn{1}{c|}{X (\warl)} &
-\multicolumn{1}{c|}{W (\warl)} &
-\multicolumn{1}{c|}{R (\warl)}
-\\
-\hline
-1 & 2 & 2 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{PMP configuration register format.}
-\label{pmpcfg}
-\end{figure}
-
-Attempting to fetch an instruction from a PMP region that does not have execute
-permissions raises an instruction access-fault exception. Attempting to execute
-a load or load-reserved instruction which accesses a physical address within
-a PMP region without read permissions raises a load access-fault exception.
-Attempting to execute a store, store-conditional,
-or AMO instruction which accesses a physical address within a PMP region without
-write permissions raises a store access-fault exception.
-
-If MXLEN is changed, the contents of the {\tt pmp{\em x}cfg} fields are
-preserved, but appear in the {\tt pmpcfg{\em y}} CSR prescribed by the new
-setting of MXLEN. For example, when MXLEN is changed from 64 to 32, {\tt
-pmp4cfg} moves from {\tt pmpcfg0}[39:32] to {\tt pmpcfg1}[7:0]. The {\tt
-pmpaddr} CSRs follow the usual CSR width modulation rules described in
-Section~\ref{sec:csrwidthmodulation}.
-
-\subsubsection*{Address Matching}
-
-The A field in a PMP entry's configuration register encodes the
-address-matching mode of the associated PMP address register. The encoding of
-this field is shown in Table~\ref{pmpcfg-a}. When A=0, this PMP entry is
-disabled and matches no addresses. Two other address-matching modes are
-supported: naturally aligned power-of-2 regions (NAPOT), including the special
-case of naturally aligned four-byte regions (NA4); and the top boundary of an
-arbitrary range (TOR). These modes support four-byte granularity.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|c|l|}
-\hline
-A & Name & Description \\
-\hline
-0 & OFF & Null region (disabled) \\
-1 & TOR & Top of range \\
-2 & NA4 & Naturally aligned four-byte region \\
-3 & NAPOT & Naturally aligned power-of-two region, $\ge$8 bytes \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of A field in PMP configuration registers.}
-\label{pmpcfg-a}
-\end{table*}
-
-NAPOT ranges make use of the low-order bits of the associated address register
-to encode the size of the range, as shown in Table~\ref{pmpcfg-napot}.
-
-\begin{table*}[h!]
-\begin{center}
- \begin{tabular}{|c|c|l|}
- \hline
- \tt pmpaddr & {\tt pmpcfg}.A & Match type and size \\
- \hline
- \tt yyyy...yyyy & NA4 & 4-byte NAPOT range \\
- \tt yyyy...yyy0 & NAPOT & 8-byte NAPOT range \\
- \tt yyyy...yy01 & NAPOT & 16-byte NAPOT range \\
- \tt yyyy...y011 & NAPOT & 32-byte NAPOT range \\
- \multicolumn{1}{|c|}{\ldots} & \ldots & \multicolumn{1}{|c|}{\ldots} \\
- \tt yy01...1111 & NAPOT & $2^{\text{XLEN}}$-byte NAPOT range \\
- \tt y011...1111 & NAPOT & $2^{\text{XLEN}+1}$-byte NAPOT range \\
- \tt 0111...1111 & NAPOT & $2^{\text{XLEN}+2}$-byte NAPOT range \\
- \tt 1111...1111 & NAPOT & $2^{\text{XLEN}+3}$-byte NAPOT range \\
- \hline
- \end{tabular}
-\end{center}
-\caption{NAPOT range encoding in PMP address and configuration registers.}
-\label{pmpcfg-napot}
-\end{table*}
-
-If TOR is selected, the associated address register forms the top of the
-address range, and the preceding PMP address register forms the bottom of the
-address range. If PMP entry $i$'s A field is set to TOR, the entry matches
-any address $y$ such that ${\tt pmpaddr}_{i-1}\leq y < {\tt pmpaddr}_i$
-(irrespective of the value of ${\tt pmpcfg}_{i-1}$).
-If
-PMP entry 0's A field is set to TOR, zero is used for the lower bound, and so
-it matches any address $y < {\tt pmpaddr}_0$.
-
-\begin{commentary}
-If ${\tt pmpaddr}_{i-1}\geq {\tt pmpaddr}_i$ and ${\tt pmpcfg_i.A}$=TOR,
-then PMP entry $i$ matches no addresses.
-\end{commentary}
-
-Although the PMP mechanism supports regions as small as four bytes, platforms
-may specify coarser PMP regions. In general, the PMP grain is $2^{G+2}$ bytes
-and must be the same across all PMP regions. When $G \geq 1$, the NA4 mode
-is not selectable. When $G \geq 2$ and ${\tt pmpcfg}_i$.A[1] is set, i.e.
-the mode is NAPOT, then bits ${\tt pmpaddr}_i$[G-2:0] read as all ones. When
-$G \geq 1$ and ${\tt pmpcfg}_i$.A[1] is clear, i.e. the mode is OFF or TOR,
-then bits ${\tt pmpaddr}_i$[G-1:0] read as all zeros. Bits ${\tt
-pmpaddr}_i$[G-1:0] do not affect the TOR address-matching logic.
-Although changing ${\tt pmpcfg}_i$.A[1] affects the value read from
-${\tt pmpaddr}_i$, it does not affect the underlying value stored in that
-register---in particular, ${\tt pmpaddr}_i$[G-1] retains its original value
-when ${\tt pmpcfg}_i$.A is changed from NAPOT to TOR/OFF then back to NAPOT.
-
-\begin{commentary}
-Software may determine the PMP granularity by writing zero to {\tt pmp0cfg},
-then writing all ones to {\tt pmpaddr0}, then reading back {\tt pmpaddr0}.
-If $G$ is the index of the least-significant bit set,
-the PMP granularity is $2^{G+2}$ bytes.
-\end{commentary}
-
-If the current XLEN is greater than MXLEN, the PMP address registers are
-zero-extended from MXLEN to XLEN bits for the purposes of address matching.
-
-\subsubsection*{Locking and Privilege Mode}
-
-The L bit indicates that the PMP entry is locked, i.e., writes to the
-configuration register and associated address registers are ignored. Locked
-PMP entries remain locked until the hart is reset. If PMP entry $i$ is
-locked, writes to {\tt pmp}$i${\tt cfg} and {\tt pmpaddr}$i$ are ignored.
-Additionally, if PMP entry~$i$ is locked and {\tt pmp}$i${\tt cfg}.A is set
-to TOR, writes to {\tt pmpaddr}$i$-1 are ignored.
-
-\begin{commentary}
-Setting the L bit locks the PMP entry even when the A field is set to OFF.
-\end{commentary}
-
-In addition to locking the PMP entry, the L bit indicates whether the R/W/X
-permissions are enforced on M-mode accesses. When the L bit is set, these
-permissions are enforced for all privilege modes. When the L bit is clear,
-any M-mode access matching the PMP entry will succeed; the R/W/X
-permissions apply only to S and U modes.
-
-\subsubsection*{Priority and Matching Logic}
-
-PMP entries are statically prioritized. The lowest-numbered PMP entry that
-matches any byte of an access determines whether that access succeeds or
-fails. The matching PMP entry must match all bytes of an access, or the
-access fails, irrespective of the L, R, W, and X bits. For example, if a PMP
-entry is configured to match the four-byte range {\tt 0xC}--{\tt 0xF}, then an
-8-byte access to the range {\tt 0x8}--{\tt 0xF} will fail, assuming that
-PMP entry is the highest-priority entry that matches those addresses.
-
-If a PMP entry matches all bytes of an access, then the L, R, W, and X bits
-determine whether the access succeeds or fails. If the L bit is clear and the
-privilege mode of the access is M, the access succeeds. Otherwise, if the
-L bit is set or the privilege mode of the access is S or U, then the access
-succeeds only if the R, W, or X bit corresponding to the access type is set.
-
-If no PMP entry matches an M-mode access, the access succeeds. If no PMP
-entry matches an S-mode or U-mode access, but at least one PMP entry is
-implemented, the access fails.
-
-\begin{commentary}
-If at least one PMP entry is implemented, but all PMP entries' A fields are
-set to OFF, then all S-mode and U-mode memory accesses will fail.
-\end{commentary}
-
-Failed accesses generate an instruction, load, or store access-fault exception. Note
-that a single instruction may generate multiple accesses, which may not be
-mutually atomic. An access-fault exception is generated if at least one access
-generated by an instruction fails, though other accesses generated by that
-instruction may succeed with visible side effects. Notably, instructions that
-reference virtual memory are decomposed into multiple accesses.
-
-On some implementations, misaligned loads, stores, and instruction fetches may
-also be decomposed into multiple accesses, some of which may succeed before an
-access-fault exception occurs. In particular, a portion of a misaligned store
-that passes the PMP check may become visible, even if another portion fails
-the PMP check. The same behavior may manifest for floating-point stores wider
-than XLEN bits (e.g., the FSD instruction in RV32D), even when the store
-address is naturally aligned.
-
-\subsection{Physical Memory Protection and Paging}
-\label{pmp-vmem}
-
-The Physical Memory Protection mechanism is designed to compose with the
-page-based virtual memory systems described in Chapter~\ref{supervisor}. When
-paging is enabled, instructions that access virtual memory may result in
-multiple physical-memory accesses, including implicit references to the page
-tables. The PMP checks apply to all of these accesses. The effective
-privilege mode for implicit page-table accesses is S.
-
-Implementations with virtual memory are permitted to perform address
-translations speculatively and earlier than required by an explicit memory
-access, and are permitted to cache them in address translation cache
-structures---including possibly caching the identity mappings from effective
-address to physical address used in Bare translation modes and M-mode. The
-PMP settings for the resulting physical address may be checked (and possibly
-cached) at any point between the address translation and the explicit memory
-access.
-Hence, when the PMP settings are modified, M-mode software must
-synchronize the PMP settings with the virtual memory system and
-any PMP or address-translation caches.
-This is
-accomplished by executing an SFENCE.VMA instruction with {\em rs1}={\tt x0}
-and {\em rs2}={\tt x0}, after the PMP CSRs are written.
-
-If page-based virtual memory is not implemented,
-memory accesses check the PMP settings synchronously, so no SFENCE.VMA is needed.
diff --git a/src/latex/preamble.tex b/src/latex/preamble.tex
deleted file mode 100644
index 41af9c0..0000000
--- a/src/latex/preamble.tex
+++ /dev/null
@@ -1,148 +0,0 @@
-% Package includes
-
-\usepackage{graphicx}
-\usepackage{geometry}
-\usepackage{array}
-\usepackage{colortbl}
-\usepackage[svgnames]{xcolor}
-
-\usepackage[colorlinks,citecolor=Navy,linkcolor=Navy]{hyperref}
-\usepackage{placeins}
-\usepackage{longtable}
-\usepackage{multirow}
-\usepackage{float}
-\usepackage{listings}
-\usepackage{comment}
-\usepackage{enumitem}
-\usepackage{verbatimbox}
-\usepackage{amsmath}
-
-\usepackage[olditem,oldenum]{paralist}
-
-% Setup margins
-
-\setlength{\topmargin}{-0.5in}
-\setlength{\textheight}{9in}
-\setlength{\oddsidemargin}{0in}
-\setlength{\evensidemargin}{0in}
-\setlength{\textwidth}{6.5in}
-
-% Useful macros
-
-\newcommand{\note}[1]{{\bf [ NOTE: #1 ]}}
-\newcommand{\fixme}[1]{{\bf [ FIXME: #1 ]}}
-\newcommand{\todo}[1]{\marginpar{\footnotesize #1}}
-
-\newcommand{\wunits}[2]{\mbox{#1\,#2}}
-\newcommand{\um}{\mbox{$\mu$m}}
-\newcommand{\xum}[1]{\wunits{#1}{\um}}
-\newcommand{\by}[2]{\mbox{#1$\times$#2}}
-\newcommand{\byby}[3]{\mbox{#1$\times$#2$\times$#3}}
-
-\newlength\savedwidth
-\newcommand\whline[1]{%
- \noalign{%
- \global\savedwidth\arrayrulewidth\global\arrayrulewidth 1.5pt%
- }%
- \cline{#1}%
- \noalign{\vskip\arrayrulewidth}%
- \noalign{\global\arrayrulewidth\savedwidth}%
-}
-
-% Custom list environments
-
-\newlist{tightlist}{itemize}{1}
-\setlist[tightlist]{label=\textbullet,nosep}
-
-\newenvironment{titledtightlist}[1]
-{\noindent
- ~~\textbf{#1}
- \begin{tightlist}}
-{\end{tightlist}}
-
-\newenvironment{commentary}
-{ \vspace{-1.5mm}
- \list{}{
- \topsep 0mm
- \partopsep 0mm
- \listparindent 1.5em
- \itemindent \listparindent
- \rightmargin \leftmargin
- \parsep 0mm
- }
- \item
- \small\em
- \noindent\nopagebreak\rule{\linewidth}{1pt}\par
- \noindent\ignorespaces
-}
-{\endlist}
-
-%\newenvironment{discussion}
-%{ \vspace{-1.5mm}
-% \list{}{
-% \topsep 0mm
-% \partopsep 0mm
-% \listparindent 1.5em
-% \itemindent \listparindent
-% \rightmargin \leftmargin
-% \parsep 0mm
-% }
-% \item
-% \small\em
-% \noindent\nopagebreak\rule{\linewidth}{1pt}\par
-% \noindent\textbf{Discussion:}
-%}
-%{\endlist}
-
-% Other commands and parameters
-
-\pagestyle{myheadings}
-\setlength{\parindent}{0in}
-\setlength{\parskip}{10pt}
-\sloppy
-\raggedbottom
-\clubpenalty=10000
-\widowpenalty=10000
-
-% Commands for register format figures.
-
-% New column types to use in tabular environment for instruction formats.
-% Allocate 0.18in per bit.
-\newcolumntype{I}{>{\centering\arraybackslash}p{0.18in}}
-% Two-bit centered column.
-\newcolumntype{W}{>{\centering\arraybackslash}p{0.36in}}
-% Three-bit centered column.
-\newcolumntype{F}{>{\centering\arraybackslash}p{0.54in}}
-% Four-bit centered column.
-\newcolumntype{Y}{>{\centering\arraybackslash}p{0.72in}}
-% Five-bit centered column.
-\newcolumntype{R}{>{\centering\arraybackslash}p{0.9in}}
-% Six-bit centered column.
-\newcolumntype{S}{>{\centering\arraybackslash}p{1.08in}}
-% Seven-bit centered column.
-\newcolumntype{O}{>{\centering\arraybackslash}p{1.26in}}
-% Eight-bit centered column.
-\newcolumntype{E}{>{\centering\arraybackslash}p{1.44in}}
-% Ten-bit centered column.
-\newcolumntype{T}{>{\centering\arraybackslash}p{1.8in}}
-% Twelve-bit centered column.
-\newcolumntype{M}{>{\centering\arraybackslash}p{2.2in}}
-% Sixteen-bit centered column.
-\newcolumntype{K}{>{\centering\arraybackslash}p{2.88in}}
-% Twenty-bit centered column.
-\newcolumntype{U}{>{\centering\arraybackslash}p{3.6in}}
-% Twenty-bit centered column.
-\newcolumntype{L}{>{\centering\arraybackslash}p{3.6in}}
-% Twenty-five-bit centered column.
-\newcolumntype{J}{>{\centering\arraybackslash}p{4.5in}}
-
-\newcommand{\instbit}[1]{\mbox{\scriptsize #1}}
-\newcommand{\instbitrange}[2]{~\instbit{#1} \hfill \instbit{#2}~}
-\newcommand{\reglabel}[1]{\hfill {\tt #1}\hfill\ }
-
-\newcommand{\wiri}{\textbf{WIRI}}
-\newcommand{\wpri}{\textbf{WPRI}}
-\newcommand{\wlrl}{\textbf{WLRL}}
-\newcommand{\warl}{\textbf{WARL}}
-
-\newcommand{\unspecified}{\textsc{unspecified}} \ No newline at end of file
diff --git a/src/latex/priv-csrs.tex b/src/latex/priv-csrs.tex
deleted file mode 100644
index 64c4aa7..0000000
--- a/src/latex/priv-csrs.tex
+++ /dev/null
@@ -1,554 +0,0 @@
-\chapter{Control and Status Registers (CSRs)}
-\label{chap:priv-csrs}
-
-The SYSTEM major opcode is used to encode all privileged instructions
-in the RISC-V ISA.
-These can be divided into two main classes: those that atomically
-read-modify-write control and status registers (CSRs), which are defined in
-the Zicsr extension, and all other privileged instructions.
-The privileged architecture requires the Zicsr extension; which other
-privileged instructions are required depends on the privileged-architecture
-feature set.
-
-In addition to the unprivileged
-state described in Volume I of this manual, an implementation may
-contain additional CSRs, accessible by some subset of the privilege
-levels using the CSR instructions described in Volume~I.
-In this chapter, we map out the CSR address space. The following
-chapters describe the function of each of the CSRs according to
-privilege level, as well as the other privileged instructions which
-are generally closely associated with a particular privilege level.
-Note that although CSRs and instructions are associated with one
-privilege level, they are also accessible at all higher privilege
-levels.
-
-Standard CSRs do not have side effects on reads but may have side effects
-on writes.
-
-\section{CSR Address Mapping Conventions}
-
-The standard RISC-V ISA sets aside a 12-bit encoding space (csr[11:0])
-for up to 4,096 CSRs. By convention, the upper 4 bits of the CSR
-address (csr[11:8]) are used to encode the read and write
-accessibility of the CSRs according to privilege level as shown in
-Table~\ref{csrrwpriv}. The top two bits (csr[11:10]) indicate whether
-the register is read/write ({\tt 00}, {\tt 01}, or {\tt 10}) or
-read-only ({\tt 11}). The next two bits (csr[9:8]) encode the lowest
-privilege level that can access the CSR.
-
-\begin{commentary}
-The CSR address convention uses the upper bits of the CSR address to
-encode default access privileges. This simplifies error checking in
-the hardware and provides a larger CSR space, but does constrain the
-mapping of CSRs into the address space.
-
-Implementations might allow a more-privileged level to trap otherwise
-permitted CSR accesses by a less-privileged level to allow these
-accesses to be intercepted. This change should be transparent to the
-less-privileged software.
-\end{commentary}
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c|c|c|l|}
-\hline
-\multicolumn{3}{|c|}{CSR Address} & Hex & \multicolumn{1}{c|}{Use and Accessibility}\\ \cline{1-3}
-[11:10] & [9:8] & [7:4] & & \\
-\hline
-\multicolumn{5}{|c|}{Unprivileged and User-Level CSRs} \\
-\hline
-\tt 00 &\tt 00 &\tt XXXX & \tt 0x000-0x0FF & Standard read/write \\
-\tt 01 &\tt 00 &\tt XXXX & \tt 0x400-0x4FF & Standard read/write \\
-\tt 10 &\tt 00 &\tt XXXX & \tt 0x800-0x8FF & Custom read/write \\
-\tt 11 &\tt 00 &\tt 0XXX & \tt 0xC00-0xC7F & Standard read-only \\
-\tt 11 &\tt 00 &\tt 10XX & \tt 0xC80-0xCBF & Standard read-only \\
-\tt 11 &\tt 00 &\tt 11XX & \tt 0xCC0-0xCFF & Custom read-only \\
-\hline
-\multicolumn{5}{|c|}{Supervisor-Level CSRs} \\
-\hline
-\tt 00 &\tt 01 &\tt XXXX & \tt 0x100-0x1FF & Standard read/write \\
-\tt 01 &\tt 01 &\tt 0XXX & \tt 0x500-0x57F & Standard read/write \\
-\tt 01 &\tt 01 &\tt 10XX & \tt 0x580-0x5BF & Standard read/write \\
-\tt 01 &\tt 01 &\tt 11XX & \tt 0x5C0-0x5FF & Custom read/write \\
-\tt 10 &\tt 01 &\tt 0XXX & \tt 0x900-0x97F & Standard read/write \\
-\tt 10 &\tt 01 &\tt 10XX & \tt 0x980-0x9BF & Standard read/write \\
-\tt 10 &\tt 01 &\tt 11XX & \tt 0x9C0-0x9FF & Custom read/write \\
-\tt 11 &\tt 01 &\tt 0XXX & \tt 0xD00-0xD7F & Standard read-only \\
-\tt 11 &\tt 01 &\tt 10XX & \tt 0xD80-0xDBF & Standard read-only \\
-\tt 11 &\tt 01 &\tt 11XX & \tt 0xDC0-0xDFF & Custom read-only \\
-\hline
-\multicolumn{5}{|c|}{Hypervisor and VS CSRs} \\
-\hline
-\tt 00 &\tt 10 &\tt XXXX & \tt 0x200-0x2FF & Standard read/write \\
-\tt 01 &\tt 10 &\tt 0XXX & \tt 0x600-0x67F & Standard read/write \\
-\tt 01 &\tt 10 &\tt 10XX & \tt 0x680-0x6BF & Standard read/write \\
-\tt 01 &\tt 10 &\tt 11XX & \tt 0x6C0-0x6FF & Custom read/write \\
-\tt 10 &\tt 10 &\tt 0XXX & \tt 0xA00-0xA7F & Standard read/write \\
-\tt 10 &\tt 10 &\tt 10XX & \tt 0xA80-0xABF & Standard read/write \\
-\tt 10 &\tt 10 &\tt 11XX & \tt 0xAC0-0xAFF & Custom read/write \\
-\tt 11 &\tt 10 &\tt 0XXX & \tt 0xE00-0xE7F & Standard read-only \\
-\tt 11 &\tt 10 &\tt 10XX & \tt 0xE80-0xEBF & Standard read-only \\
-\tt 11 &\tt 10 &\tt 11XX & \tt 0xEC0-0xEFF & Custom read-only \\
-\hline
-\multicolumn{5}{|c|}{Machine-Level CSRs} \\
-\hline
-\tt 00 &\tt 11 &\tt XXXX & \tt 0x300-0x3FF & Standard read/write \\
-\tt 01 &\tt 11 &\tt 0XXX & \tt 0x700-0x77F & Standard read/write \\
-\tt 01 &\tt 11 &\tt 100X & \tt 0x780-0x79F & Standard read/write \\
-\tt 01 &\tt 11 &\tt 1010 & \tt 0x7A0-0x7AF & Standard read/write debug CSRs \\
-\tt 01 &\tt 11 &\tt 1011 & \tt 0x7B0-0x7BF & Debug-mode-only CSRs \\
-\tt 01 &\tt 11 &\tt 11XX & \tt 0x7C0-0x7FF & Custom read/write \\
-\tt 10 &\tt 11 &\tt 0XXX & \tt 0xB00-0xB7F & Standard read/write \\
-\tt 10 &\tt 11 &\tt 10XX & \tt 0xB80-0xBBF & Standard read/write \\
-\tt 10 &\tt 11 &\tt 11XX & \tt 0xBC0-0xBFF & Custom read/write \\
-\tt 11 &\tt 11 &\tt 0XXX & \tt 0xF00-0xF7F & Standard read-only \\
-\tt 11 &\tt 11 &\tt 10XX & \tt 0xF80-0xFBF & Standard read-only \\
-\tt 11 &\tt 11 &\tt 11XX & \tt 0xFC0-0xFFF & Custom read-only \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Allocation of RISC-V CSR address ranges.}
-\label{csrrwpriv}
-\end{table*}
-
-Attempts to access a non-existent CSR raise an illegal instruction
-exception. Attempts to access a CSR without appropriate privilege
-level or to write a read-only register also raise illegal instruction
-exceptions. A read/write register might also contain some bits that
-are read-only, in which case writes to the read-only bits are ignored.
-
-Table~\ref{csrrwpriv} also indicates the convention to allocate CSR
-addresses between standard and custom uses. The CSR addresses
-designated for custom uses will not be redefined by future
-standard extensions.
-
-Machine-mode standard read-write CSRs {\tt 0x7A0}--{\tt 0x7BF} are reserved
-for use by the debug system. Of these CSRs, {\tt 0x7A0}--{\tt 0x7AF} are
-accessible to machine mode, whereas {\tt 0x7B0}--{\tt 0x7BF} are only visible
-to debug mode. Implementations should raise illegal instruction exceptions on
-machine-mode access to the latter set of registers.
-
-\begin{commentary}
-Effective virtualization requires that as many instructions run natively as
-possible inside a virtualized environment, while any privileged accesses trap
-to the virtual machine monitor~\cite{goldbergvm}. CSRs that are read-only at
-some lower privilege level are shadowed into separate CSR addresses if they
-are made read-write at a higher privilege level. This avoids trapping
-permitted lower-privilege accesses while still causing traps on illegal
-accesses. Currently, the counters are the only shadowed CSRs.
-\end{commentary}
-
-\section{CSR Listing}
-
-Tables~\ref{ucsrnames}--\ref{mcsrnames1} list the CSRs that have
-currently been allocated CSR addresses. The timers, counters, and
-floating-point CSRs are standard unprivileged CSRs.
-The other
-registers are used by privileged code, as described in the following
-chapters. Note that not all registers are required on all
-implementations.
-
-\begin{table}[htb!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|}
-\hline
-Number & Privilege & Name & Description \\
-\hline
-\multicolumn{4}{|c|}{Unprivileged Floating-Point CSRs} \\
-\hline
-\tt 0x001 & URW &\tt fflags & Floating-Point Accrued Exceptions. \\
-\tt 0x002 & URW &\tt frm & Floating-Point Dynamic Rounding Mode. \\
-\tt 0x003 & URW &\tt fcsr & Floating-Point Control and Status
-Register ({\tt frm} + {\tt fflags}). \\
-\hline
-\multicolumn{4}{|c|}{Unprivileged Counter/Timers} \\
-\hline
-\tt 0xC00 & URO &\tt cycle & Cycle counter for RDCYCLE instruction. \\
-\tt 0xC01 & URO &\tt time & Timer for RDTIME instruction. \\
-\tt 0xC02 & URO &\tt instret & Instructions-retired counter for RDINSTRET instruction. \\
-\tt 0xC03 & URO &\tt hpmcounter3 & Performance-monitoring counter. \\
-\tt 0xC04 & URO &\tt hpmcounter4 & Performance-monitoring counter. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0xC1F & URO &\tt hpmcounter31 & Performance-monitoring counter. \\
-\tt 0xC80 & URO &\tt cycleh & Upper 32 bits of {\tt cycle}, RV32 only. \\
-\tt 0xC81 & URO &\tt timeh & Upper 32 bits of {\tt time}, RV32 only. \\
-\tt 0xC82 & URO &\tt instreth & Upper 32 bits of {\tt instret}, RV32 only. \\
-\tt 0xC83 & URO &\tt hpmcounter3h & Upper 32 bits of {\tt hpmcounter3}, RV32 only. \\
-\tt 0xC84 & URO &\tt hpmcounter4h & Upper 32 bits of {\tt hpmcounter4}, RV32 only. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0xC9F & URO &\tt hpmcounter31h & Upper 32 bits of {\tt hpmcounter31}, RV32 only. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Currently allocated RISC-V unprivileged CSR addresses.}
-\label{ucsrnames}
-\end{table}
-
-\begin{table}[htb!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|}
-\hline
-Number & Privilege & Name & Description \\
-\hline
-\multicolumn{4}{|c|}{Supervisor Trap Setup} \\
-\hline
-\tt 0x100 & SRW &\tt sstatus & Supervisor status register. \\
-\tt 0x104 & SRW &\tt sie & Supervisor interrupt-enable register. \\
-\tt 0x105 & SRW &\tt stvec & Supervisor trap handler base address. \\
-\tt 0x106 & SRW &\tt scounteren & Supervisor counter enable. \\
-\hline
-\multicolumn{4}{|c|}{Supervisor Configuration} \\
-\hline
-\tt 0x10A & SRW &\tt senvcfg & Supervisor environment configuration register. \\
-\hline
-\multicolumn{4}{|c|}{Supervisor Trap Handling} \\
-\hline
-\tt 0x140 & SRW &\tt sscratch & Scratch register for supervisor trap handlers. \\
-\tt 0x141 & SRW &\tt sepc & Supervisor exception program counter. \\
-\tt 0x142 & SRW &\tt scause & Supervisor trap cause. \\
-\tt 0x143 & SRW &\tt stval & Supervisor bad address or instruction. \\
-\tt 0x144 & SRW &\tt sip & Supervisor interrupt pending. \\
-\hline
-\multicolumn{4}{|c|}{Supervisor Protection and Translation} \\
-\hline
-\tt 0x180 & SRW &\tt satp & Supervisor address translation and protection. \\
-\hline
-\multicolumn{4}{|c|}{Debug/Trace Registers} \\
-\hline
-\tt 0x5A8 & SRW &\tt scontext & Supervisor-mode context register. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Currently allocated RISC-V supervisor-level CSR addresses.}
-\label{scsrnames}
-\end{table}
-
-\begin{table}[htb!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|}
-\hline
-Number & Privilege & Name & Description \\
-\hline
-\multicolumn{4}{|c|}{Hypervisor Trap Setup} \\
-\hline
-\hline
-\tt 0x600 & HRW &\tt hstatus & Hypervisor status register. \\
-\tt 0x602 & HRW &\tt hedeleg & Hypervisor exception delegation register. \\
-\tt 0x603 & HRW &\tt hideleg & Hypervisor interrupt delegation register. \\
-\tt 0x604 & HRW &\tt hie & Hypervisor interrupt-enable register. \\
-\tt 0x606 & HRW &\tt hcounteren & Hypervisor counter enable. \\
-\tt 0x607 & HRW &\tt hgeie & Hypervisor guest external interrupt-enable register. \\
-\hline
-\multicolumn{4}{|c|}{Hypervisor Trap Handling} \\
-\hline
-\tt 0x643 & HRW &\tt htval & Hypervisor bad guest physical address. \\
-\tt 0x644 & HRW &\tt hip & Hypervisor interrupt pending. \\
-\tt 0x645 & HRW &\tt hvip & Hypervisor virtual interrupt pending. \\
-\tt 0x64A & HRW &\tt htinst & Hypervisor trap instruction (transformed). \\
-\tt 0xE12 & HRO &\tt hgeip & Hypervisor guest external interrupt pending. \\
-\hline
-\multicolumn{4}{|c|}{Hypervisor Configuration} \\
-\hline
-\tt 0x60A & HRW &\tt henvcfg & Hypervisor environment configuration register. \\
-\tt 0x61A & HRW &\tt henvcfgh & Additional hypervisor env. conf. register, RV32 only. \\
-\hline
-\multicolumn{4}{|c|}{Hypervisor Protection and Translation} \\
-\hline
-\tt 0x680 & HRW &\tt hgatp & Hypervisor guest address translation and protection. \\
-\hline
-\multicolumn{4}{|c|}{Debug/Trace Registers} \\
-\hline
-\tt 0x6A8 & HRW &\tt hcontext & Hypervisor-mode context register. \\
-\hline
-\multicolumn{4}{|c|}{Hypervisor Counter/Timer Virtualization Registers} \\
-\hline
-\tt 0x605 & HRW &\tt htimedelta & Delta for VS/VU-mode timer. \\
-\tt 0x615 & HRW &\tt htimedeltah & Upper 32 bits of {\tt htimedelta}, HSXLEN=32 only. \\
-\hline
-\multicolumn{4}{|c|}{Virtual Supervisor Registers} \\
-\hline
-\tt 0x200 & HRW &\tt vsstatus & Virtual supervisor status register. \\
-\tt 0x204 & HRW &\tt vsie & Virtual supervisor interrupt-enable register. \\
-\tt 0x205 & HRW &\tt vstvec & Virtual supervisor trap handler base address. \\
-\tt 0x240 & HRW &\tt vsscratch & Virtual supervisor scratch register. \\
-\tt 0x241 & HRW &\tt vsepc & Virtual supervisor exception program counter. \\
-\tt 0x242 & HRW &\tt vscause & Virtual supervisor trap cause. \\
-\tt 0x243 & HRW &\tt vstval & Virtual supervisor bad address or instruction. \\
-\tt 0x244 & HRW &\tt vsip & Virtual supervisor interrupt pending. \\
-\tt 0x280 & HRW &\tt vsatp & Virtual supervisor address translation and protection. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Currently allocated RISC-V hypervisor and VS CSR addresses.}
-\label{hcsrnames}
-\end{table}
-
-
-\begin{table}[htb!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|}
-\hline
-Number & Privilege & Name & Description \\
-\hline
-\multicolumn{4}{|c|}{Machine Information Registers} \\
-\hline
-\tt 0xF11 & MRO &\tt mvendorid & Vendor ID. \\
-\tt 0xF12 & MRO &\tt marchid & Architecture ID. \\
-\tt 0xF13 & MRO &\tt mimpid & Implementation ID. \\
-\tt 0xF14 & MRO &\tt mhartid & Hardware thread ID. \\
-\tt 0xF15 & MRO &\tt mconfigptr & Pointer to configuration data structure. \\
-\hline
-\multicolumn{4}{|c|}{Machine Trap Setup} \\
-\hline
-\tt 0x300 & MRW &\tt mstatus & Machine status register. \\
-\tt 0x301 & MRW &\tt misa & ISA and extensions \\
-\tt 0x302 & MRW &\tt medeleg & Machine exception delegation register. \\
-\tt 0x303 & MRW &\tt mideleg & Machine interrupt delegation register. \\
-\tt 0x304 & MRW &\tt mie & Machine interrupt-enable register. \\
-\tt 0x305 & MRW &\tt mtvec & Machine trap-handler base address. \\
-\tt 0x306 & MRW &\tt mcounteren & Machine counter enable. \\
-\tt 0x310 & MRW &\tt mstatush & Additional machine status register, RV32 only. \\
-\hline
-\multicolumn{4}{|c|}{Machine Trap Handling} \\
-\hline
-\tt 0x340 & MRW &\tt mscratch & Scratch register for machine trap handlers. \\
-\tt 0x341 & MRW &\tt mepc & Machine exception program counter. \\
-\tt 0x342 & MRW &\tt mcause & Machine trap cause. \\
-\tt 0x343 & MRW &\tt mtval & Machine bad address or instruction. \\
-\tt 0x344 & MRW &\tt mip & Machine interrupt pending. \\
-\tt 0x34A & MRW &\tt mtinst & Machine trap instruction (transformed). \\
-\tt 0x34B & MRW &\tt mtval2 & Machine bad guest physical address. \\
-\hline
-\multicolumn{4}{|c|}{Machine Configuration} \\
-\hline
-\tt 0x30A & MRW &\tt menvcfg & Machine environment configuration register. \\
-\tt 0x31A & MRW &\tt menvcfgh & Additional machine env. conf. register, RV32 only. \\
-\tt 0x747 & MRW &\tt mseccfg & Machine security configuration register. \\
-\tt 0x757 & MRW &\tt mseccfgh & Additional machine security conf. register, RV32 only. \\
-\hline
-\multicolumn{4}{|c|}{Machine Memory Protection} \\
-\hline
-%\tt 0x380 & MRW &\tt mbase & Base register. \\
-%\tt 0x381 & MRW &\tt mbound & Bound register. \\
-%\tt 0x382 & MRW &\tt mibase & Instruction base register. \\
-%\tt 0x383 & MRW &\tt mibound & Instruction bound register. \\
-%\tt 0x384 & MRW &\tt mdbase & Data base register. \\
-%\tt 0x385 & MRW &\tt mdbound & Data bound register. \\
-\tt 0x3A0 & MRW &\tt pmpcfg0 & Physical memory protection configuration. \\
-\tt 0x3A1 & MRW &\tt pmpcfg1 & Physical memory protection configuration, RV32 only. \\
-\tt 0x3A2 & MRW &\tt pmpcfg2 & Physical memory protection configuration. \\
-\tt 0x3A3 & MRW &\tt pmpcfg3 & Physical memory protection configuration, RV32 only. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0x3AE & MRW &\tt pmpcfg14 & Physical memory protection configuration. \\
-\tt 0x3AF & MRW &\tt pmpcfg15 & Physical memory protection configuration, RV32 only. \\
-\tt 0x3B0 & MRW &\tt pmpaddr0 & Physical memory protection address register. \\
-\tt 0x3B1 & MRW &\tt pmpaddr1 & Physical memory protection address register. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0x3EF & MRW &\tt pmpaddr63 & Physical memory protection address register. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Currently allocated RISC-V machine-level CSR addresses.}
-\label{mcsrnames0}
-\end{table}
-
-\begin{table}[htb!]
-\begin{center}
-\begin{tabular}{|l|l|l|l|}
-\hline
-Number & Privilege & Name & Description \\
-\hline
-\multicolumn{4}{|c|}{Machine Non-Maskable Interrupt Handling} \\
-\hline
-\tt 0x740 & MRW &\tt mnscratch & Resumable NMI scratch register. \\
-\tt 0x741 & MRW &\tt mnepc & Resumable NMI program counter. \\
-\tt 0x742 & MRW &\tt mncause & Resumable NMI cause. \\
-\tt 0x744 & MRW &\tt mnstatus & Resumable NMI status. \\
-\hline
-\multicolumn{4}{|c|}{Machine Counter/Timers} \\
-\hline
-\tt 0xB00 & MRW &\tt mcycle & Machine cycle counter. \\
-\tt 0xB02 & MRW &\tt minstret & Machine instructions-retired counter. \\
-\tt 0xB03 & MRW &\tt mhpmcounter3 & Machine performance-monitoring counter. \\
-\tt 0xB04 & MRW &\tt mhpmcounter4 & Machine performance-monitoring counter. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0xB1F & MRW &\tt mhpmcounter31 & Machine performance-monitoring counter. \\
-\tt 0xB80 & MRW &\tt mcycleh & Upper 32 bits of {\tt mcycle}, RV32 only. \\
-\tt 0xB82 & MRW &\tt minstreth & Upper 32 bits of {\tt minstret}, RV32 only. \\
-\tt 0xB83 & MRW &\tt mhpmcounter3h & Upper 32 bits of {\tt mhpmcounter3}, RV32 only. \\
-\tt 0xB84 & MRW &\tt mhpmcounter4h & Upper 32 bits of {\tt mhpmcounter4}, RV32 only. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0xB9F & MRW &\tt mhpmcounter31h & Upper 32 bits of {\tt mhpmcounter31}, RV32 only. \\
-\hline
-\multicolumn{4}{|c|}{Machine Counter Setup} \\
-\hline
-\tt 0x320 & MRW &\tt mcountinhibit & Machine counter-inhibit register. \\
-\tt 0x323 & MRW &\tt mhpmevent3 & Machine performance-monitoring event selector. \\
-\tt 0x324 & MRW &\tt mhpmevent4 & Machine performance-monitoring event selector. \\
-& & \multicolumn{1}{c|}{\vdots} & \ \\
-\tt 0x33F & MRW &\tt mhpmevent31 & Machine performance-monitoring event selector. \\
-\hline
-\multicolumn{4}{|c|}{Debug/Trace Registers (shared with Debug Mode)} \\
-\hline
-\tt 0x7A0 & MRW &\tt tselect & Debug/Trace trigger register select. \\
-\tt 0x7A1 & MRW &\tt tdata1 & First Debug/Trace trigger data register. \\
-\tt 0x7A2 & MRW &\tt tdata2 & Second Debug/Trace trigger data register. \\
-\tt 0x7A3 & MRW &\tt tdata3 & Third Debug/Trace trigger data register. \\
-\tt 0x7A8 & MRW &\tt mcontext & Machine-mode context register. \\
-\hline
-\multicolumn{4}{|c|}{Debug Mode Registers } \\
-\hline
-\tt 0x7B0 & DRW &\tt dcsr & Debug control and status register. \\
-\tt 0x7B1 & DRW &\tt dpc & Debug program counter. \\
-\tt 0x7B2 & DRW &\tt dscratch0 & Debug scratch register 0. \\
-\tt 0x7B3 & DRW &\tt dscratch1 & Debug scratch register 1. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Currently allocated RISC-V machine-level CSR addresses.}
-\label{mcsrnames1}
-\end{table}
-
-\clearpage
-
-\section{CSR Field Specifications}
-
-
-The following definitions and abbreviations are used in specifying the
-behavior of fields within the CSRs.
-
-\subsection*{Reserved Writes Preserve Values, Reads Ignore Values (WPRI)}
-
-Some whole read/write fields are reserved for future use. Software
-should ignore the values read from these fields, and should preserve
-the values held in these fields when writing values to other fields of
-the same register.
-For forward compatibility, implementations that do not furnish these fields
-must make them read-only zero.
-These fields are labeled \wpri\ in the register descriptions.
-
-\begin{commentary}
-To simplify the software model, any backward-compatible future
-definition of previously reserved fields within a CSR must cope with
-the possibility that a non-atomic read/modify/write sequence is used
-to update other fields in the CSR. Alternatively, the original CSR
-definition must specify that subfields can only be updated atomically,
-which may require a two-instruction clear bit/set bit sequence in
-general that can be problematic if intermediate values are not legal.
-\end{commentary}
-
-\subsection*{Write/Read Only Legal Values (WLRL)}
-
-Some read/write CSR fields specify behavior for only a subset of
-possible bit encodings, with other bit encodings reserved. Software
-should not write anything other than legal values to such a field, and
-should not assume a read will return a legal value unless the last
-write was of a legal value, or the register has not been written since
-another operation (e.g., reset) set the register to a legal value.
-These fields are labeled \wlrl\ in the register descriptions.
-
-\begin{commentary}
-Hardware implementations need only implement enough state bits to
-differentiate between the supported values, but must always return the
-complete specified bit-encoding of any supported value when read.
-\end{commentary}
-
-Implementations are permitted but not required to raise an illegal
-instruction exception if an instruction attempts to write a
-non-supported value to a \wlrl\ field. Implementations can
-return arbitrary bit patterns on the read of a \wlrl\ field when the last
-write was of an illegal value, but the value returned should
-deterministically depend on the illegal written value and
-the value of the field prior to the write.
-
-\subsection*{Write Any Values, Reads Legal Values (WARL)}
-
-Some read/write CSR fields are only defined for a subset of bit
-encodings, but allow any value to be written while guaranteeing to
-return a legal value whenever read. Assuming that writing the CSR has
-no other side effects, the range of supported values can be determined
-by attempting to write a desired setting then reading to see if the
-value was retained. These fields are labeled \warl\ in the register
-descriptions.
-
-Implementations will not raise an exception on writes of unsupported
-values to a \warl\ field. Implementations can
-return any legal value on the read of a \warl\ field when the last
-write was of an illegal value, but the legal value returned should
-deterministically depend on the illegal written value and
-the architectural state of the hart.
-
-\section{CSR Field Modulation}
-
-If a write to one CSR changes the set of legal values allowed for a
-field of a second CSR, then unless specified otherwise, the second
-CSR's field immediately gets an \unspecified\ value from among its new
-legal values.
-This is true even if the field's value before the write remains legal
-after the write;
-the value of the field may be changed in consequence of the write to
-the controlling CSR.
-
-\begin{commentary}
-As a special case of this rule, the value written to one CSR may
-control whether a field of a second CSR is writable (with multiple
-legal values) or is read-only.
-When a write to the controlling CSR causes the second CSR's field
-to change from previously read-only to now writable, that field
-immediately gets an \unspecified\ but legal value, unless specified
-otherwise.
-\end{commentary}
-
-\begin{commentary}
-Some CSR fields are, when writable, defined as aliases of other CSR
-fields.
-Let $x$ be such a CSR field, and let $y$ be the CSR field it aliases
-when writable.
-If a write to a controlling CSR causes field $x$ to change from
-previously read-only to now writable, the new value of $x$ is not
-\unspecified\ but instead immediately reflects the existing value of its
-alias~$y$, as required.
-\end{commentary}
-
-A change to the value of a CSR for this reason is not a write to the
-affected CSR and thus does not trigger any side effects specified for
-that CSR.
-
-\section{Implicit Reads of CSRs}
-
-Implementations sometimes perform {\em implicit} reads of CSRs.
-(For example, all S-mode instruction fetches implicitly read the {\tt satp}
-CSR.)
-Unless otherwise specified, the value returned by an implicit read of a CSR
-is the same value that would have been returned by an explicit read of the
-CSR, using a CSR-access instruction in a sufficient privilege mode.
-
-\section{CSR Width Modulation}
-\label{sec:csrwidthmodulation}
-
-If the width of a CSR is changed (for example, by changing MXLEN or UXLEN, as
-described in Section~\ref{xlen-control}), the values of the {\em writable}
-fields and bits of the new-width CSR are, unless specified otherwise,
-determined from the previous-width CSR as though by this algorithm:
-
-\begin{enumerate}
-
-\item The value of the previous-width CSR is copied to a temporary register of
-the same width.
-
-\item For the read-only bits of the previous-width CSR, the bits at the same
-positions in the temporary register are set to zeros.
-
-\item The width of the temporary register is changed to the new width. If the
-new width $W$ is narrower than the previous width, the least-significant $W$
-bits of the temporary register are retained and the more-significant bits are
-discarded. If the new width is wider than the previous width, the temporary
-register is zero-extended to the wider width.
-
-\item Each writable field of the new-width CSR takes the value of the bits at
-the same positions in the temporary register.
-
-\end{enumerate}
-
-Changing the width of a CSR is not a read or write of the CSR and thus
-does not trigger any side effects.
diff --git a/src/latex/priv-history.tex b/src/latex/priv-history.tex
deleted file mode 100644
index fadcdc2..0000000
--- a/src/latex/priv-history.tex
+++ /dev/null
@@ -1,29 +0,0 @@
-\chapter{History}
-
-\section{Research Funding at UC Berkeley}
-
-Development of the RISC-V architecture and implementations has been
-partially funded by the following sponsors.
-
-\begin{itemize}
-\item {\bf Par Lab:} Research supported by Microsoft (Award \#024263)
- and Intel (Award \#024894) funding and by matching funding by
- U.C. Discovery (Award \#DIG07-10227). Additional support came from
- Par Lab affiliates Nokia, NVIDIA, Oracle, and Samsung.
-
-\item {\bf Project Isis:} DoE Award DE-SC0003624.
-
-\item {\bf ASPIRE Lab}: DARPA PERFECT program, Award HR0011-12-2-0016.
- DARPA POEM program Award HR0011-11-C-0100. The Center for Future
- Architectures Research (C-FAR), a STARnet center funded by the
- Semiconductor Research Corporation. Additional support from ASPIRE
- industrial sponsor, Intel, and ASPIRE affiliates, Google, Huawei,
- Nokia, NVIDIA, Oracle, and Samsung.
-
-\end{itemize}
-
-The content of this paper does not necessarily reflect the position or the
-policy of the US government and no official endorsement should be
-inferred.
-
-
diff --git a/src/latex/priv-insns.tex b/src/latex/priv-insns.tex
deleted file mode 100644
index cbfdd70..0000000
--- a/src/latex/priv-insns.tex
+++ /dev/null
@@ -1,9 +0,0 @@
-\chapter{RISC-V Privileged Instruction Set Listings}
-
-This chapter presents instruction-set listings for all instructions
-defined in the RISC-V Privileged Architecture.
-
-The instruction-set listings for unprivileged instructions, including the
-ECALL and EBREAK instructions, are provided in Volume I of this manual.
-
-\input{priv-instr-table}
diff --git a/src/latex/priv-instr-table.tex b/src/latex/priv-instr-table.tex
deleted file mode 100644
index adac306..0000000
--- a/src/latex/priv-instr-table.tex
+++ /dev/null
@@ -1,344 +0,0 @@
-\newpage
-
-\begin{table}[p]
-\begin{small}
-\begin{center}
-\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l}
-& & & & & & & & & & \\
- &
-\multicolumn{1}{l}{\instbit{31}} &
-\multicolumn{1}{r}{\instbit{27}} &
-\instbit{26} &
-\instbit{25} &
-\multicolumn{1}{l}{\instbit{24}} &
-\multicolumn{1}{r}{\instbit{20}} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{funct7} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} & R-type \\
-\cline{2-11}
-
-
-&
-\multicolumn{6}{|c|}{imm[11:0]} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} & I-type \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Trap-Return Instructions} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001000} &
-\multicolumn{2}{c|}{00010} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & SRET \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0011000} &
-\multicolumn{2}{c|}{00010} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & MRET \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0111000} &
-\multicolumn{2}{c|}{00010} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & MNRET \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Interrupt-Management Instructions} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001000} &
-\multicolumn{2}{c|}{00101} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & WFI \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Supervisor Memory-Management Instructions} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001001} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & SFENCE.VMA \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Hypervisor Memory-Management Instructions} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0010001} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HFENCE.VVMA \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110001} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HFENCE.GVMA \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Hypervisor Virtual-Machine Load and Store Instructions} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110000} &
-\multicolumn{2}{c|}{00000} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.B \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110000} &
-\multicolumn{2}{c|}{00001} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.BU \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110010} &
-\multicolumn{2}{c|}{00000} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.H \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110010} &
-\multicolumn{2}{c|}{00001} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.HU \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110100} &
-\multicolumn{2}{c|}{00000} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.W \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110010} &
-\multicolumn{2}{c|}{00011} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLVX.HU \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110100} &
-\multicolumn{2}{c|}{00011} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLVX.WU \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110001} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HSV.B \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110011} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HSV.H \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110101} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HSV.W \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf Hypervisor Virtual-Machine Load and Store Instructions, RV64 only} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110100} &
-\multicolumn{2}{c|}{00001} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.WU \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110110} &
-\multicolumn{2}{c|}{00000} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{1110011} & HLV.D \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110111} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{100} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HSV.D \\
-\cline{2-11}
-
-
-&
-\multicolumn{10}{c}{} & \\
-&
-\multicolumn{10}{c}{\bf \emph{Svinval} Memory-Management Extension} & \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001011} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & SINVAL.VMA \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001100} &
-\multicolumn{2}{c|}{00000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & SFENCE.W.INVAL \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0001100} &
-\multicolumn{2}{c|}{00001} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & SFENCE.INVAL.IR \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0010011} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HINVAL.VVMA \\
-\cline{2-11}
-
-
-&
-\multicolumn{4}{|c|}{0110011} &
-\multicolumn{2}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{000} &
-\multicolumn{1}{c|}{00000} &
-\multicolumn{1}{c|}{1110011} & HINVAL.GVMA \\
-\cline{2-11}
-
-
-\end{tabular}
-\end{center}
-\end{small}
-\caption{RISC-V Privileged Instructions}
-\end{table} \ No newline at end of file
diff --git a/src/latex/priv-intro.tex b/src/latex/priv-intro.tex
deleted file mode 100644
index f909ea6..0000000
--- a/src/latex/priv-intro.tex
+++ /dev/null
@@ -1,210 +0,0 @@
-\chapter{Introduction}
-
-This document describes the RISC-V privileged architecture, which
-covers all aspects of RISC-V systems beyond the unprivileged ISA,
-including privileged instructions as well as additional functionality
-required for running operating systems and attaching external devices.
-
-\begin{commentary}
-Commentary on our design decisions is formatted as in this paragraph,
-and can be skipped if the reader is only interested in the
-specification itself.
-\end{commentary}
-
-\begin{commentary}
-We briefly note that the entire privileged-level design described in
-this document could be replaced with an entirely different
-privileged-level design without changing the unprivileged ISA, and
-possibly without even changing the ABI. In particular, this
-privileged specification was designed to run existing popular
-operating systems, and so embodies the conventional level-based
-protection model. Alternate privileged specifications could embody
-other more flexible protection-domain models. For simplicity of
-expression, the text is written as if this was the only possible
-privileged architecture.
-\end{commentary}
-
-\section{RISC-V Privileged Software Stack Terminology}
-
-This section describes the terminology we use to describe components
-of the wide range of possible privileged software stacks for RISC-V.
-
-Figure~\ref{fig:privimps} shows some of the possible software stacks
-that can be supported by the RISC-V architecture. The left-hand side
-shows a simple system that supports only a single application running
-on an application execution environment (AEE). The application is
-coded to run with a particular application binary interface (ABI).
-The ABI includes the supported user-level ISA plus a set of ABI calls to
-interact with the AEE. The ABI hides details of the AEE from the
-application to allow greater flexibility in implementing the AEE. The
-same ABI could be implemented natively on multiple different host OSs,
-or could be supported by a user-mode emulation environment running on
-a machine with a different native ISA.
-
-\begin{figure}[th]
-\centering
-\includegraphics[width=\textwidth]{figs/privimps.pdf}
-\caption{Different implementation stacks supporting various forms of
- privileged execution.}
-\label{fig:privimps}
-\end{figure}
-
-\begin{commentary}
-Our graphical convention represents abstract interfaces using black
-boxes with white text, to separate them from concrete instances of
-components implementing the interfaces.
-\end{commentary}
-
-The middle configuration shows a conventional operating system (OS)
-that can support multiprogrammed execution of multiple
-applications. Each application communicates over an ABI with the OS,
-which provides the AEE. Just as applications interface with an AEE
-via an ABI, RISC-V operating systems interface with a supervisor
-execution environment (SEE) via a supervisor binary interface (SBI).
-An SBI comprises the user-level and supervisor-level ISA together with
-a set of SBI function calls. Using a single SBI across all SEE
-implementations allows a single OS binary image to run on any SEE.
-The SEE can be a simple boot loader and BIOS-style IO system in a
-low-end hardware platform, or a hypervisor-provided virtual machine in
-a high-end server, or a thin translation layer over a host operating
-system in an architecture simulation environment.
-
-\begin{commentary}
-Most supervisor-level ISA definitions do not separate the SBI from the
-execution environment and/or the hardware platform, complicating
-virtualization and bring-up of new hardware platforms.
-\end{commentary}
-
-The rightmost configuration shows a virtual machine monitor
-configuration where multiple multiprogrammed OSs are supported by a
-single hypervisor. Each OS communicates via an SBI with the
-hypervisor, which provides the SEE. The hypervisor communicates with
-the hypervisor execution environment (HEE) using a hypervisor binary
-interface (HBI), to isolate the hypervisor from details of the
-hardware platform.
-
-\begin{commentary}
-The ABI, SBI, and HBI are still a work-in-progress, but we are now
-prioritizing support for Type-2 hypervisors where the SBI is provided
-recursively by an S-mode OS.
-\end{commentary}
-
-Hardware implementations of the RISC-V ISA will generally require
-additional features beyond the privileged ISA to support the various
-execution environments (AEE, SEE, or HEE).
-
-\section{Privilege Levels}
-
-At any time, a RISC-V hardware thread ({\em hart}) is running at some
-privilege level encoded as a mode in one or more CSRs (control and
-status registers). Three RISC-V privilege levels are currently defined
-as shown in Table~\ref{privlevels}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c|c|c|}
- \hline
- Level & Encoding & Name & Abbreviation \\ \hline
- 0 & \tt 00 & User/Application & U \\
- 1 & \tt 01 & Supervisor & S \\
- 2 & \tt 10 & {\em Reserved} & \\
- 3 & \tt 11 & Machine & M \\
- \hline
- \end{tabular}
-\end{center}
-\caption{RISC-V privilege levels.}
-\label{privlevels}
-\end{table*}
-
-Privilege levels are used to provide protection between different
-components of the software stack, and attempts to perform operations
-not permitted by the current privilege mode will cause an exception to
-be raised. These exceptions will normally cause traps into an
-underlying execution environment.
-
-\begin{commentary}
-In the description, we try to separate the privilege level for which
-code is written, from the privilege mode in which it runs, although
-the two are often tied. For example, a supervisor-level operating
-system can run in supervisor-mode on a system with three privilege
-modes, but can also run in user-mode under a classic virtual machine
-monitor on systems with two or more privilege modes. In both cases,
-the same supervisor-level operating system binary code can be used,
-coded to a supervisor-level SBI and hence expecting to be able to use
-supervisor-level privileged instructions and CSRs. When running a
-guest OS in user mode, all supervisor-level actions will be trapped
-and emulated by the SEE running in the higher-privilege level.
-\end{commentary}
-
-The machine level has the highest privileges and is the only mandatory
-privilege level for a RISC-V hardware platform. Code run in
-machine-mode (M-mode) is usually inherently trusted, as it has
-low-level access to the machine implementation. M-mode can be used to
-manage secure execution environments on RISC-V. User-mode (U-mode)
-and supervisor-mode (S-mode) are intended for conventional application
-and operating system usage respectively.
-
-Each privilege level has a core set of privileged ISA extensions with optional
-extensions and variants. For example, machine-mode supports an optional
-standard extension for memory protection. Also, supervisor mode can be
-extended to support Type-2 hypervisor execution as described in
-Chapter~\ref{hypervisor}.
-
-Implementations might provide anywhere from 1 to 3 privilege modes
-trading off reduced isolation for lower implementation cost, as shown
-in Table~\ref{privcombs}.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|l|l|}
- \hline
- Number of levels & Supported Modes & Intended Usage \\ \hline
- 1 & M & Simple embedded systems \\
- 2 & M, U & Secure embedded systems \\
- 3 & M, S, U & Systems running Unix-like operating systems\\
- \hline
- \end{tabular}
-\end{center}
-\caption{Supported combinations of privilege modes.}
-\label{privcombs}
-\end{table*}
-
-All hardware implementations must provide M-mode, as this is the only
-mode that has unfettered access to the whole machine. The simplest
-RISC-V implementations may provide only M-mode, though this will
-provide no protection against incorrect or malicious application code.
-
-\begin{commentary}
- The lock feature of the optional PMP facility can provide some
- limited protection even with only M-mode implemented.
-\end{commentary}
-
-Many RISC-V implementations will also support at least user mode
-(U-mode) to protect the rest of the system from application code.
-Supervisor mode (S-mode) can be added to provide isolation between a
-supervisor-level operating system and the SEE.
-
-A hart normally runs application code in U-mode until some trap (e.g.,
-a supervisor call or a timer interrupt) forces a switch to a trap
-handler, which usually runs in a more privileged mode. The hart will
-then execute the trap handler, which will eventually resume execution
-at or after the original trapped instruction in U-mode. Traps that
-increase privilege level are termed {\em vertical} traps, while traps
-that remain at the same privilege level are termed {\em horizontal}
-traps. The RISC-V privileged architecture provides flexible routing
-of traps to different privilege layers.
-
-\begin{commentary}
-Horizontal traps can be implemented as vertical traps that
-return control to a horizontal trap handler in the less-privileged mode.
-\end{commentary}
-
-\section{Debug Mode}
-
-Implementations may also include a debug mode to support off-chip
-debugging and/or manufacturing test. Debug mode (D-mode) can be
-considered an additional privilege mode, with even more access than
-M-mode. The separate debug specification proposal describes operation
-of a RISC-V hart in debug mode. Debug mode reserves a few CSR
-addresses that are only accessible in D-mode, and may also reserve
-some portions of the physical address space on a platform.
diff --git a/src/latex/priv-preface.tex b/src/latex/priv-preface.tex
deleted file mode 100644
index 4415570..0000000
--- a/src/latex/priv-preface.tex
+++ /dev/null
@@ -1,276 +0,0 @@
-\chapter{Preface}
-
-This document describes the RISC-V privileged architecture.
-This release, version \privrev, contains the following versions of the RISC-V
-ISA modules:
-
-{
-\begin{table}[hbt]
- \centering
- \begin{tabular}{|c|l|c|}
- \hline
- Module & Version & Status\\
- \hline
- \em Machine ISA & \em 1.13 & \em Draft \\
- \em Smrnmi Extension & \em 0.1 & \em Draft \\
- \bf Supervisor ISA & \bf 1.12 & \bf Ratified \\
- \bf Svnapot Extension & \bf 1.0 & \bf Ratified \\
- \bf Svpbmt Extension & \bf 1.0 & \bf Ratified \\
- \bf Svinval Extension & \bf 1.0 & \bf Ratified \\
- \bf Hypervisor ISA & \bf 1.0 & \bf Ratified \\
- \hline
- \end{tabular}
-\end{table}
-}
-
-The following compatible changes have been made to the Machine ISA since version 1.12:
-\vspace{-0.2in}
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
-\item Defined the {\tt misa}.V field to reflect that the V extension has been implemented.
-\end{itemize}
-
-\section*{Preface to Version 20211203}
-
-This document describes the RISC-V privileged architecture.
-This release, version 20211203, contains the following versions of the RISC-V
-ISA modules:
-
-{
-\begin{table}[hbt]
- \centering
- \begin{tabular}{|c|l|c|}
- \hline
- Module & Version & Status\\
- \hline
- \bf Machine ISA & \bf 1.12 & \bf Ratified \\
- \bf Supervisor ISA & \bf 1.12 & \bf Ratified \\
- \bf Svnapot Extension & \bf 1.0 & \bf Ratified \\
- \bf Svpbmt Extension & \bf 1.0 & \bf Ratified \\
- \bf Svinval Extension & \bf 1.0 & \bf Ratified \\
- \bf Hypervisor ISA & \bf 1.0 & \bf Ratified \\
- \hline
- \end{tabular}
-\end{table}
-}
-
-The following changes have been made since version 1.11, which, while not
-strictly backwards compatible, are not anticipated to cause software
-portability problems in practice:
-\vspace{-0.2in}
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
-\item Changed MRET and SRET to clear {\tt mstatus}.MPRV when leaving M-mode.
-\item Reserved additional {\tt satp} patterns for future use.
-\item Stated that the {\tt scause} Exception Code field must implement
- bits 4--0 at minimum.
-\item Relaxed I/O regions have been specified to follow RVWMO. The previous
- specification implied that PPO rules other than fences and acquire/release
- annotations did not apply.
-\item Constrained the LR/SC reservation set size and shape when using
- page-based virtual memory.
-\item PMP changes require an SFENCE.VMA on any hart that implements
- page-based virtual memory, even if VM is not currently enabled.
-\item Allowed for speculative updates of page table entry A bits.
-\item Clarify that if the address-translation algorithm non-speculatively
- reaches a PTE in which a bit reserved for future standard use is set,
- a page-fault exception must be raised.
-\end{itemize}
-
-Additionally, the following compatible changes have been made since version
-1.11:
-\vspace{-0.2in}
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
-\item Removed the N extension.
-\item Defined the mandatory RV32-only CSR {\tt mstatush}, which contains
- most of the same fields as the upper 32 bits of RV64's {\tt mstatus}.
-\item Defined the mandatory CSR {\tt mconfigptr}, which if nonzero
- contains the address of a configuration data structure.
-\item Defined optional {\tt mseccfg} and {\tt mseccfgh} CSRs, which control
- the machine's security configuration.
-\item Defined {\tt menvcfg}, {\tt henvcfg}, and {\tt senvcfg} CSRs
- (and RV32-only {\tt menvcfgh} and {\tt henvcfgh} CSRs),
- which control various characteristics of the execution environment.
-\item Designated part of SYSTEM major opcode for custom use.
-\item Permitted the unconditional delegation of less-privileged interrupts.
-\item Added optional big-endian and bi-endian support.
-\item Made priority of load/store/AMO address-misaligned exceptions
- implementation-defined relative to load/store/AMO page-fault
- and access-fault exceptions.
-\item PMP reset values are now platform-defined.
-\item An additional 48 optional PMP registers have been defined.
-\item Slightly relaxed the atomicity requirement for A and D bit updates
- performed by the implementation.
-\item Clarify the architectural behavior of address-translation caches
-\item Added Sv57 and Sv57x4 address translation modes.
-\item Software breakpoint exceptions are permitted to write either 0
- or the {\tt pc} to {\em x}\/{\tt tval}.
-\item Clarified that bare S-mode need not support the SFENCE.VMA instruction.
-\item Specified relaxed constraints for implicit reads of non-idempotent
- regions.
-\item Added the Svnapot Standard Extension, along with the N bit in
- Sv39, Sv48, and Sv57 PTEs.
-\item Added the Svpbmt Standard Extension, along with the PBMT bits in
- Sv39, Sv48, and Sv57 PTEs.
-\item Added the Svinval Standard Extension and associated instructions.
-\end{itemize}
-
-Finally, the hypervisor architecture proposal has been extensively revised.
-
-\newpage
-
-\section*{Preface to Version 1.11}
-
-This is version 1.11 of the RISC-V privileged architecture.
-The document contains the following versions of the RISC-V ISA
-modules:
-
-{
-\begin{table}[hbt]
- \centering
- \begin{tabular}{|c|l|c|}
- \hline
- Module & Version & Status\\
- \hline
- \bf Machine ISA & \bf 1.11 & \bf Ratified \\
- \bf Supervisor ISA & \bf 1.11 & \bf Ratified \\
- \em Hypervisor ISA & \em 0.3 & \em Draft \\
- \hline
- \end{tabular}
-\end{table}
-}
-
-Changes from version 1.10 include:
-\vspace{-0.2in}
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
- \item Moved Machine and Supervisor spec to {\bf Ratified} status.
-\item Improvements to the description and commentary.
-\item Added a draft proposal for a hypervisor extension.
-\item Specified which interrupt sources are reserved for standard use.
-\item Allocated some synchronous exception causes for custom use.
-\item Specified the priority ordering of synchronous exceptions.
-\item Added specification that xRET instructions may, but are not
- required to, clear LR reservations if A extension present.
-\item The virtual-memory system no longer permits supervisor mode to execute
- instructions from user pages, regardless of the SUM setting.
-\item Clarified that ASIDs are private to a hart, and added commentary about
- the possibility of a future global-ASID extension.
-\item SFENCE.VMA semantics have been clarified.
-\item Made the {\tt mstatus}.MPP field \warl, rather than \wlrl.
-\item Made the unused {\em x}{\tt ip} fields \wpri, rather than \wiri.
-\item Made the unused {\tt misa} fields \warl, rather than \wiri.
-\item Made the unused {\tt pmpaddr} and {\tt pmpcfg} fields \warl, rather than \wiri.
-\item Required all harts in a system to employ the same PTE-update scheme as each other.
-\item Rectified an editing error that misdescribed the mechanism by which
- {\tt mstatus}.{\em x}IE is written upon an exception.
-\item Described scheme for emulating misaligned AMOs.
-\item Specified the behavior of the {\tt misa} and {\em x}{\tt epc} registers in
- systems with variable IALIGN.
-\item Specified the behavior of writing self-contradictory values to the
- {\tt misa} register.
-\item Defined the {\tt mcountinhibit} CSR, which stops performance
- counters from incrementing to reduce energy consumption.
-\item Specified semantics for PMP regions coarser than four bytes.
-\item Specified contents of CSRs across XLEN modification.
-\item Moved PLIC chapter into its own document.
-\end{itemize}
-
-\newpage
-
-\section*{Preface to Version 1.10}
-
-This is version 1.10 of the RISC-V privileged
-architecture proposal. Changes from version 1.9.1 include:
-
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
-\item The previous version of this document was released under a
- Creative Commons Attribution 4.0 International License by the
- original authors, and this and future versions of this document will
- be released under the same license.
-\item The explicit convention on shadow CSR addresses has been removed
- to reclaim CSR space. Shadow CSRs can still be added as needed.
-\item The {\tt mvendorid} register now contains the JEDEC code of the
- core provider as opposed to a code supplied by the Foundation. This
- avoids redundancy and offloads work from the Foundation.
-\item The interrupt-enable stack discipline has been simplified.
-\item An optional mechanism to change the base ISA used by supervisor
- and user modes has been added to the {\tt mstatus} CSR, and the
- field previously called Base in {\tt misa} has been renamed to {\tt
- MXL} for consistency.
-\item Clarified expected use of XS to summarize additional extension
- state status fields in {\tt mstatus}.
-\item Optional vectored interrupt support has been added to the
- {\tt mtvec} and {\tt stvec} CSRs.
-\item The SEIP and UEIP bits in the {\tt mip} CSR have been redefined
- to support software injection of external interrupts.
- \item The {\tt mbadaddr} register has been subsumed by a more
- general {\tt mtval} register that can now capture bad
- instruction bits on an illegal instruction fault to speed
- instruction emulation.
-\item The machine-mode base-and-bounds translation and protection
- schemes have been removed from the specification as part of moving
- the virtual memory configuration to {\tt sptbr} (now {\tt satp}). Some of the
- motivation for the base and bound schemes are now covered by the PMP
- registers, but space remains available in {\tt mstatus} to add these
- back at a later date if deemed useful.
-\item In systems with only M-mode, or with both M-mode and U-mode but
- without U-mode trap support, the {\tt medeleg} and {\tt mideleg}
- registers now do not exist, whereas previously they returned zero.
-\item Virtual-memory page faults now have {\tt mcause} values distinct from
- physical-memory access faults. Page-fault exceptions can now be
- delegated to S-mode without delegating exceptions generated by PMA
- and PMP checks.
-\item An optional physical-memory protection (PMP) scheme has been proposed.
-\item The supervisor virtual memory configuration has been moved from the
- {\tt mstatus} register to the {\tt sptbr} register. Accordingly, the
- {\tt sptbr} register has been renamed to {\tt satp} (Supervisor Address
- Translation and Protection) to reflect its broadened role.
-\item The SFENCE.VM instruction has been removed in favor of the improved
- SFENCE.VMA instruction.
-\item The {\tt mstatus} bit MXR has been exposed to S-mode via {\tt sstatus}.
-\item The polarity of the PUM bit in {\tt sstatus} has been inverted to
- shorten code sequences involving MXR. The bit has been renamed to SUM.
-\item Hardware management of page-table entry Accessed and Dirty bits has
- been made optional; simpler implementations may trap to software to
- set them.
-\item The counter-enable scheme has changed, so that S-mode can
- control availability of counters to U-mode.
-\item H-mode has been removed, as we are focusing on recursive
- virtualization support in S-mode. The encoding space has been
- reserved and may be repurposed at a later date.
-\item A mechanism to improve virtualization performance by
- trapping S-mode virtual-memory management operations has been added.
-\item The Supervisor Binary Interface (SBI) chapter has been removed, so
- that it can be maintained as a separate specification.
-\end{itemize}
-
-\newpage
-
-\section*{Preface to Version 1.9.1}
-
-This is version 1.9.1 of the RISC-V privileged architecture
-proposal. Changes from version 1.9 include:
-
-\begin{itemize}
- \parskip 0pt
- \itemsep 1pt
-\item Numerous additions and improvements to the commentary sections.
-\item Change configuration string proposal to be use a search process
- that supports various formats including Device Tree String and
- flattened Device Tree.
-\item Made {\tt misa} optionally writable to support modifying base
- and supported ISA extensions. CSR address of {\tt misa} changed.
-\item Added description of debug mode and debug CSRs.
-\item Added a hardware performance monitoring scheme. Simplified the
- handling of existing hardware counters, removing privileged versions
- of the counters and the corresponding delta registers.
-\item Fixed description of SPIE in presence of user-level interrupts.
-\end{itemize} \ No newline at end of file
diff --git a/src/latex/riscv-privileged.tex b/src/latex/riscv-privileged.tex
deleted file mode 100644
index 23b491d..0000000
--- a/src/latex/riscv-privileged.tex
+++ /dev/null
@@ -1,89 +0,0 @@
-%=======================================================================
-% riscv-privileged.tex
-%-----------------------------------------------------------------------
-
-\documentclass[twoside,11pt]{book}
-
-% Fix copy/pasting of ligatures in Acrobat
-\input{glyphtounicode.tex}
-\pdfgentounicode=1 %
-
-\input{preamble}
-
-\newcommand{\privrev}{20211203}
-\newcommand{\privmonthyear}{December 2021}
-
-\setcounter{secnumdepth}{3}
-\setcounter{tocdepth}{3}
-
-\begin{document}
-
-\title{\vspace{-0.7in}\Large {\bf The RISC-V Instruction Set Manual} \\
- \large {\bf Volume II: Privileged Architecture} \\
- Document Version \privrev
- \vspace{-0.1in}}
-
-\author{Editors: Andrew Waterman$^{1}$, Krste Asanovi\'{c}$^{1,2}$, John Hauser \\
- $^{1}$SiFive Inc., \\
- $^{2}$CS Division, EECS Department, University of California, Berkeley \\
- {\tt waterman@eecs.berkeley.edu, krste@berkeley.edu, jh.riscv@jhauser.us} \\
- \today
-}
-
-\date{}
-\maketitle
-
-Contributors to all versions of the spec in alphabetical order (please contact
-editors to suggest corrections): Krste Asanovi\'{c}, Peter Ashenden, Rimas
-Avi\v{z}ienis, Jacob Bachmeyer, Allen J. Baum, Jonathan Behrens, Paolo Bonzini, Ruslan Bukin,
-Christopher Celio, Chuanhua Chang, David Chisnall, Anthony Coulter, Palmer Dabbelt, Monte
-Dalrymple, Paul Donahue, Greg Favor, Dennis Ferguson, Marc Gauthier, Andy Glew,
-Gary Guo, Mike Frysinger, John Hauser, David Horner, Olof
-Johansson, David Kruckemyer, Yunsup Lee, Daniel Lustig, Andrew Lutomirski, Prashanth Mundkur,
-Jonathan Neusch{\"a}fer, Rishiyur
-Nikhil, Stefan O'Rear, Albert Ou, John Ousterhout, David Patterson, Dmitri
-Pavlov, Kade Phillips, Josh Scheid, Colin Schmidt, Michael Taylor, Wesley Terpstra, Matt Thomas, Tommy Thorn, Ray
-VanDeWalker, Megan Wachs, Steve Wallach, Andrew Waterman, Claire Wolf,
-and Reinoud Zandijk.
-
-This document is released under a Creative Commons Attribution 4.0
-International License.
-
-This document is a derivative of the RISC-V
-privileged specification version 1.9.1 released under following license:
-\copyright \,2010--2017 Andrew Waterman, Yunsup Lee, Rimas
-Avi\v{z}ienis, David Patterson, Krste Asanovi\'{c}.
-Creative Commons Attribution 4.0 International License.
-
-Please cite as: ``The RISC-V Instruction Set
-Manual, Volume II: Privileged Architecture, Document Version \privrev'', Editors
-Andrew Waterman, Krste Asanovi\'{c}, and John Hauser, RISC-V International, \privmonthyear.
-
-\markboth{Volume II: RISC-V Privileged Architectures V\privrev}
-{Volume II: RISC-V Privileged Architectures V\privrev}
-\thispagestyle{empty}
-
-\frontmatter
-
-\input{priv-preface}
-
-{\hypersetup{linktoc=all,hidelinks}
-\tableofcontents
-}
-
-\mainmatter
-
-\input{priv-intro}
-\input{priv-csrs}
-\input{machine}
-\input{rnmi}
-\input{supervisor}
-\input{hypervisor}
-\input{priv-insns}
-
-\input{priv-history}
-
-\bibliographystyle{plain}
-\bibliography{riscv-spec}
-
-\end{document}
diff --git a/src/latex/riscv-spec.bib b/src/latex/riscv-spec.bib
deleted file mode 100644
index 3d7157f..0000000
--- a/src/latex/riscv-spec.bib
+++ /dev/null
@@ -1,513 +0,0 @@
-@Misc{ieee754-2008,
- key = "{IEEE}",
- title = "{ANSI/IEEE Std 754-2008}, {IEEE} standard for
- floating-point arithmetic",
- publisher = {"Institute of Electrical and Electronic Engineers"},
- year = 2008
-}
-
-@inproceedings{riscI-isca1981,
- title = {{RISC I}: {A} Reduced Instruction Set {VLSI} Computer},
- author = {David A. Patterson and Carlo H. S\'{e}quin},
- booktitle = {ISCA},
- location = {Minneapolis, Minnesota, USA},
- pages = {443-458},
- year = {1981}
-}
-
-@InProceedings{Katevenis:1983,
- author = {Katevenis, Manolis G.H. and Sherburne,Jr., Robert W. and Patterson, David A. and S{\'e}quin, Carlo H.},
- title = {The {RISC II} micro-architecture},
- booktitle = {Proceedings VLSI 83 Conference},
- year = 1983,
- month = {August}}
-
-@article{Katevenis:1984,
- author = {Katevenis, Manolis G.H. and Sherburne,Jr., Robert W. and Patterson, David A. and S{\'e}quin, Carlo H.},
- title = {The {RISC II} micro-architecture},
- journal = {Advances in VLSI and Computur Systems},
- issue_date = {Fall 1984},
- volume = {1},
- number = {2},
- month = October,
- year = {1984},
- pages = {138--152},
- publisher = {Computer Science Press, Inc.},
- address = {New York, NY, USA},
-}
-
-@inproceedings{Ungar:1984,
- author = {David Ungar and Ricki Blau and Peter Foley and Dain Samples
- and David Patterson},
- title = {Architecture of {SOAR}: {Smalltalk} on a {RISC}},
- booktitle = {ISCA},
- address = {Ann Arbor, MI},
- year = {1984},
- pages = {188--197}
-}
-
-@Article{spur-jsscc1989,
- author = {David D. Lee and Shing I. Kong and Mark D. Hill and
- George S. Taylor and David A. Hodges and Randy
- H. Katz and David A. Patterson},
- title = {A {VLSI} Chip Set for a Multiprocessor
- Workstation--{Part I}: An {RISC} Microprocessor with
- Coprocessor Interface and Support for Symbolic
- Processing},
- journal = {IEEE JSSC},
- year = 1989,
- volume = 24,
- number = 6,
- pages = {1688--1698},
- month = {December}}
-
-@MastersThesis{waterman-ms,
- author = {Andrew Waterman},
- title = {{Improving Energy Efficiency and Reducing Code Size with RISC-V Compressed}},
- school = {University of California, Berkeley},
- year = 2011,
- Number = {UCB/EECS-2011-63},
-}
-
-@phdthesis{waterman-phd,
- Author = {Waterman, Andrew},
- Title = {Design of the {RISC-V} Instruction Set Architecture},
- School = {University of California, Berkeley},
- Year = {2016},
- Number = {UCB/EECS-2016-1},
-}
-
-@TechReport{riscvtr,
- author = {Andrew Waterman and Yunsup Lee and David A. Patterson and Krste Asanovi\'{c}},
- title = {The {RISC-V} Instruction Set Manual, {Volume I}: {Base}
- User-Level {ISA}},
- institution = {EECS Department, University of California, Berkeley},
- year = 2011,
- number = {UCB/EECS-2011-62},
- month = {May}}
-
-
-
-@Book{kane:mips:1991,
- author = {G. Kane and J. Heinrich},
- title = {MIPS RISC Architecture},
- publisher = {Prentice Hall},
- month = {September},
- year = 1991,
- note = {ISBN 0135904722},
- edition = {2nd}
-}
-
-@book{patterson:undergrad:2008,
- author = {D. A. Patterson and J. L. Hennessy},
- title = {Computer Organization and Design: The
- Hardware/Software Interface},
- edition = {4th},
- publisher = {Morgan Kaufmann},
- month = {November},
- year = {2008},
- note = {ISBN 0123744938}
-}
-
-@Book{sweetman:mips:2006,
- author = {D. Sweetman},
- title = {See {MIPS} Run},
- edition = {2nd},
- publisher = {Morgan Kaufmann},
- year = {2006},
- month = {October},
- note = {ISBN 0120884216}
-}
-
-@Misc{mips:arch:2010,
- author = {MIPS Technologies Inc.},
- title = {{MIPS32} Architecture for Programmers},
- year = {2010},
- note = {\verb!https://www.imgtec.com/mips/architectures/mips32/!}
-}
-
-@Misc{sgi:mipspro:1997,
- author = {Silicon Graphics Inc.},
- title = {{MIPSpro} 64-{B}it Porting and Translation Guide},
- year = {1997},
- note = {\verb!http://techpubs.sgi.com/!}
-}
-
-@Misc{openriscarch,
- author = {OpenCores},
- title = {{OpenRISC} 1000 Architecture Manual, Architecture
- Version 1.0},
- month = {December},
- year = 2012}
-
-@ARTICLE{tremblay-vis-ieeemicro1996,
-author={Tremblay, M. and O'Connor, J.M. and Narayanan, V. and Liang He},
-journal={IEEE Micro},
-title={{VIS} speeds new media processing},
-year={1996},
-month=AUG,
-volume={16},
-number={4},
-pages={10 -20},
-keywords={3D graphics environments;RISC-style instructions;UltraSparc;VIS;Visual Instruction Set;media processing;media-processing algorithms;computer graphics;instruction sets;reduced instruction set computing;},
-ISSN={0272-1732},}
-
-@ARTICLE{lee-max-ieeemicro1996,
-author={Lee, R.B.},
-journal={IEEE Micro},
-title={Subword parallelism with {MAX-2}},
-year={1996},
-month=AUG,
-volume={16},
-number={4},
-pages={51 -59},
-keywords={MAX-2;instruction extensions;media processing;parallel computation;subword parallelism;word-oriented general-purpose processor;instruction sets;multimedia computing;parallel processing;},
-ISSN={0272-1732},}
-
-@ARTICLE{peleg-mmx-ieeemicro1996,
-author={Peleg, A. and Weiser, U.},
-journal={IEEE Micro},
-title={{MMX} technology extension to the {Intel} architecture},
-year={1996},
-month=AUG,
-volume={16},
-number={4},
-pages={42 -50},
-keywords={Intel architecture;MMX;SIMD;communications;compatibility;multimedia;operating systems;microprocessor chips;parallel architectures;},
-ISSN={0272-1732},}
-
-@ARTICLE{raman-sse-ieeemicro2000,
-author={Raman, S.K. and Pentkovski, V. and Keshava, J.},
-journal={IEEE Micro},
-title={Implementing streaming {SIMD} extensions on the {Pentium}-{III} processor },
-year={2000},
-month=JUL/AUG,
-volume={20},
-number={4},
-pages={47 -57},
-keywords={Internet;Pentium III developers;demanding multimedia;die size constraints;streaming SIMD extensions;instruction sets;microprocessor chips;},
-ISSN={0272-1732},}
-
-@misc{lomont-avx-irm2011,
-author={Chris Lomont},
-title = {Introduction to {Intel Advanced Vector Extensions}},
-howpublished = {Intel White Paper},
-year = {2011},
-}
-
-@ARTICLE{goodacre-armisa-computer2005,
-author={Goodacre, J. and Sloss, A.N.},
-journal={Computer},
-title={Parallelism and the {ARM} instruction set architecture},
-year={2005},
-month=JULY,
-volume={38},
-number={7},
-pages={ 42 - 50},
-keywords={ ARM RISC processor; ARM chip design; ARM instruction set architecture; digital signal processor-like operations; exception handling; multiprocessing; reduced-instruction-set computing; subword parallelism; thread-level parallelism; variable execution time; instruction sets; microprocessor chips; parallel architectures; parallel programming; reduced instruction set computing;},
-ISSN={0018-9162},}
-
-@ARTICLE{diefendorff-altivec-ieeemicro2000,
-author={Diefendorff, K. and Dubey, P.K. and Hochsprung, R. and Scale, H.},
-journal={IEEE Micro},
-title={{AltiVec} extension to {PowerPC} accelerates media processing},
-year={2000},
-month=MAR/APR,
-volume={20},
-number={2},
-pages={85 -95},
-keywords={2D image processing;3D graphics;AltiVec extension;Apple G4;Hewlett-Packard added MAX;MDMX;MIPS architecture;MMX;Motorola's MPC 7400;PA-RISC architecture;PowerPC;PowerPC's AltiVec;SSE;Silicon Graphics;Sun enhanced Sparc;alias KNI;handwriting recognition;media mining;media processing;multimedia technologies;narrow/broadband signal processing;personal computing;digital signal processing chips;handwriting recognition;multimedia systems;parallel architectures;},
-ISSN={0272-1732},}
-
-@misc{gwennap-mdmx-mpr1996,
-author={Linley Gwennap},
-title={Digital, {MIPS} Add Multimedia Extensions},
-howpublished = {Microprocessor Report},
-year = {1996},
-}
-@article{majc,
- author = {Tremblay, Marc and Chan, Jeffrey and Chaudhry, Shailender and Conigliaro, Andrew W. and Tse, Shing Sheung},
- title = {The {MAJC} Architecture: {A} Synthesis of Parallelism and Scalability},
- journal = {IEEE Micro},
- issue_date = {November 2000},
- volume = {20},
- number = {6},
- month = November,
- year = {2000},
- pages = {12--25},
- publisher = {IEEE Computer Society Press},
- address = {Los Alamitos, CA, USA},
-}
-
-@InProceedings{tx2,
- author = {John M. Frankovich and H. Philip Peterson},
- title = {A functional description of the {Lincoln} {TX-2} computer},
- booktitle = {Western Joint Computer Conference},
- year = 1957,
- address = {Los Angeles, CA},
- month = {February}
-}
-
-
-@TechReport{heil-tr1996,
- author = {Timothy H. Heil and James E. Smith},
- title = {Selective Dual Path Execution},
- institution = {University of Wisconsin - Madison},
- year = 1996,
- month = {November}}
-
-@inproceedings{Klauser-1998,
- author = {Klauser, A. and Austin, T. and Grunwald, D. and Calder, B.},
- title = {Dynamic Hammock Predication for Non-Predicated Instruction Set Architectures},
- booktitle = {Proceedings of the 1998 International Conference on Parallel Architectures and Compilation Techniques},
- series = {PACT '98},
- year = {1998},
- address = {Washington, DC, USA},
-}
-
-@inproceedings{Kim-micro2005,
- author = {Kim, Hyesoon and Mutlu, Onur and Stark, Jared and Patt, Yale N.},
- title = {Wish Branches: Combining Conditional Branching and Predication for Adaptive Predicated Execution},
- booktitle = {Proceedings of the 38th annual IEEE/ACM International Symposium on Microarchitecture},
- series = {MICRO 38},
- year = {2005},
- location = {Barcelona, Spain},
- pages = {43--54},
-}
-
-@INPROCEEDINGS{Gharachorloo90memoryconsistency,
- author = {Kourosh Gharachorloo and Daniel Lenoski and James Laudon
- and Phillip Gibbons and Anoop Gupta and John
- Hennessy},
- title = {Memory Consistency and Event Ordering in Scalable
- Shared-Memory Multiprocessors},
- booktitle = {In Proceedings of the 17th Annual International
- Symposium on Computer Architecture},
- year = {1990},
- pages = {15--26}
-}
-
-
-@inproceedings{Rajwar:2001:SLE,
- author = {Rajwar, Ravi and Goodman, James R.},
- title = {Speculative lock elision: enabling highly concurrent multithreaded execution},
- booktitle = {Proceedings of the 34th annual ACM/IEEE International Symposium on Microarchitecture},
- series = {MICRO 34},
- year = {2001},
- location = {Austin, Texas},
- pages = {294--305},
- publisher = {IEEE Computer Society},
-}
-
-@Misc{sparcieee1994,
- title = {{IEEE} Standard for a 32-bit microprocessor},
- howpublished = {IEEE Std. 1754-1994},
- year = 1994}
-
-
-@Book{parisckane1995,
- author = {Gerry Kane},
- title = {PA-RISC 2.0 Architecture},
- publisher = {Prentice Hall},
- year = 1995,
- month = {December},
- note = {ISBN 978-0131827349}}
-
-@article{ibmpower7,
- title={{IBM} {POWER7} multicore server processor},
- author={Sinharoy, Balaram and Kalla, R. and Starke, W. J. and Le,
- H. Q. and Cargnoni, R. and Van Norstrand, J. A. and
- Ronchetti, B. J. and Stuecheli, J. and Leenstra,
- J. and Guthrie, G. L. and Nguyen, D. Q. and Blaner,
- B. and Marino, C. F. and Retter, E. and Williams, P.},
- journal={IBM Journal of Research and Development},
- volume={55},
- number={3},
- pages={1--1},
- year={2011},
- publisher={IBM}
-}
-
-@article{virtio,
- author = {Russell, Rusty},
- title = {Virtio: {Towards} a De-facto Standard for Virtual {I/O} Devices},
- journal = {SIGOPS Oper. Syst. Rev.},
- issue_date = {July 2008},
- volume = {42},
- number = {5},
- month = jul,
- year = {2008},
- issn = {0163-5980},
- pages = {95--103},
- numpages = {9},
- publisher = {ACM},
- address = {New York, NY, USA},
-}
-
-@ARTICLE{goldbergvm,
-author={Goldberg, Robert P.},
-journal={Computer},
-title={Survey of virtual machine research},
-year={1974},
-month={June},
-volume={7},
-number={6},
-pages={34-45}
-}
-
-@Manual{alphapalcode,
- title = {{PALcode} for {Alpha} microprocessors: System Design
- Guide},
- organization = {Digital Equipment Corporation},
- address = {Maynard, Massachusetts},
- note = {EC-QFGLC-TE},
- month = {May},
- year = 1996}
-
-@article{transparent-superpages,
- author = {Navarro, Juan and Iyer, Sitaram and Druschel, Peter and Cox, Alan},
- title = {Practical, Transparent Operating System Support for Superpages},
- journal = {SIGOPS Oper. Syst. Rev.},
- issue_date = {Winter 2002},
- volume = {36},
- number = {SI},
- month = dec,
- year = {2002},
- issn = {0163-5980},
- pages = {89--104},
- numpages = {16},
- url = {https://doi.org/10.1145/844128.844138},
- doi = {10.1145/844128.844138},
- acmid = {844138},
- publisher = {ACM},
- address = {New York, NY, USA},
-}
-
-@Book{stretch,
- editor = {Werner Buchholz},
- title = {Planning a computer system: {Project} {Stretch}},
- publisher = {McGraw-Hill Book Company},
- year = 1962}
-
-@Article{ibm360,
- author = {G. M. Amdahl and G. A. Blaauw and F. P. Brooks, Jr.},
- title = {Architecture of the {IBM} {System/360}},
- journal = {IBM Journal of R. \& D.},
- year = 1964,
- volume = 8,
- number = 2
-}
-
-@inproceedings{cdc6600,
- author = {Thornton, James E.},
- title = {Parallel Operation in the {Control Data 6600}},
- booktitle = {Proceedings of the October 27-29, 1964, Fall Joint Computer Conference, Part II: Very High Speed Computer Systems},
- series = {AFIPS '64 (Fall, part II)},
- year = {1965},
- location = {San Francisco, California},
- pages = {33--40}
-}
-
-@InProceedings{jtseng:sbbci,
- author = {J. Tseng and K. Asanovi\'c},
- title = {Energy-Efficient Register Access},
- booktitle = {Proc. of the 13th Symposium on Integrated Circuits and
- Systems Design},
- address = {Manaus, Brazil},
- month = {September},
- year = 2000,
- pages = "377--384"
-}
-
-@TechReport{riscvtr2,
- author = {Andrew Waterman and Yunsup Lee and David A. Patterson and Krste Asanovi\'{c}},
- title = {The {RISC-V} Instruction Set Manual, {Volume I}: {Base}
- User-Level {ISA} Version 2.0},
- institution = {EECS Department, University of California, Berkeley},
- year = 2014,
- number = {UCB/EECS-2014-54},
- month = {May}}
-
-@Article{ibm370varch,
- author = {W. Buchholz},
- title = "{The IBM System/370 vector architecture}",
- journal = {IBM Systems Journal},
- year = 1986,
- volume = 25,
- number = 1,
- pages = {51--62}
-}
-
-@PhdThesis{krstephd,
- author = {Krste Asanovi\'c},
- title = {Vector Microprocessors},
- school = {University of California at Berkeley},
- year = 1998,
- month = {May},
- note = {Available as techreport UCB/CSD-98-1014}
-}
-
-@InProceedings{vp200,
- author = "Kenichi Miura and Keiichiro Uchida",
- title = "{FACOM Vector Processor System: VP-100/VP-200}",
- editor = "Kawalik",
- volume = "F7",
- booktitle = "Proceedings of NATO Advanced Research Workshop on
- High Speed Computing",
- year = 1984,
- publisher = "Springer-Verlag",
- note = "Also in: IEEE Tutorial Supercomputers: Design and
- Applications. Kai Hwang(editor), pp59-73"
-}
-@Manual{crayx1asm,
- title = {Cray Assembly Language {(CAL)} for {Cray} {X1} Systems Reference Manual},
- organization = {Cray Inc.},
- edition = {1.1},
- month = {June},
- year = 2003}
-}
-
-@misc{riscv-elf-psabi,
- title = {{RISC-V ELF psABI Specification}},
- howpublished = {\url{https://github.com/riscv/riscv-elf-psabi-doc/}}
-}
-
-@misc{riscv-asm-manual,
- title = {{RISC-V Assembly Programmer's Manual}},
- howpublished = {\url{https://github.com/riscv/riscv-asm-manual}}
-}
-
-@inproceedings{lithe-pan-hotpar09,
-author = {Heidi Pan and Benjamin Hindman and Krste Asanovi\'c},
-title = {{Lithe}: Enabling Efficient Composition of Parallel Libraries},
-booktitle = {Proceedings of the 1st USENIX Workshop on Hot Topics in Parallelism (HotPar~'09)},
-month = {March},
-year = {2009},
-address = {Berkeley, CA}}
-
-
-@inproceedings{lithe-pan-pldi10,
-author = {Heidi Pan and Benjamin Hindman and Krste Asanovi\'c},
-title = {Composing Parallel Software Efficiently with {Lithe}},
-booktitle = {31st Conference on Programming Language Design and Implementation},
-month = {June},
-year = {2010},
-address = {Toronto, Canada}}
-
-@article{roux:hal-01091186,
- TITLE = {{Innocuous Double Rounding of Basic Arithmetic Operations}},
- AUTHOR = {Roux, Pierre},
- URL = {https://hal.archives-ouvertes.fr/hal-01091186},
- JOURNAL = {{Journal of Formalized Reasoning}},
- PUBLISHER = {{ASDD-AlmaDL}},
- VOLUME = {7},
- NUMBER = {1},
- PAGES = {131-142},
- YEAR = {2014},
- MONTH = Nov,
- DOI = {10.6092/issn.1972-5787/4359},
- KEYWORDS = {Coq ; double rounding ; floating-point arithmetic},
- PDF = {https://hal.archives-ouvertes.fr/hal-01091186/file/submission.pdf},
- HAL_ID = {hal-01091186},
- HAL_VERSION = {v1},
-}
diff --git a/src/latex/rnmi.tex b/src/latex/rnmi.tex
deleted file mode 100644
index d5821de..0000000
--- a/src/latex/rnmi.tex
+++ /dev/null
@@ -1,234 +0,0 @@
-\chapter{``Smrnmi'' Standard Extension for Resumable Non-Maskable Interrupts, Version 0.4}
-\label{chap:rnmi}
-
-{\bf Warning! This draft specification may change before being
-accepted as standard by RISC-V International.}
-
-The base machine-level architecture supports only
-unresumable non-maskable interrupts (UNMIs), where the NMI jumps to a
-handler in machine mode, overwriting the current {\tt mepc} and {\tt mcause}
-register values. If the hart had been executing machine-mode code in
-a trap handler, the previous values in {\tt mepc} and {\tt mcause} would not
-be recoverable and so execution is not generally resumable.
-
-The Smrnmi extension adds support for resumable non-maskable interrupts
-(RNMIs) to RISC-V. The extension adds four new CSRs ({\tt mnepc},
-{\tt mncause}, {\tt mnstatus}, and {\tt mnscratch}) to hold the interrupted state,
-and one new instruction, MNRET, to resume from the RNMI handler.
-
-\section{RNMI Interrupt Signals}
-
-The {\tt rnmi} interrupt signals are inputs to
-the hart. These interrupts have higher priority than any other
-interrupt or exception on the hart and cannot be disabled by software.
-Specifically, they are not disabled by clearing the {\tt mstatus}.MIE
-register.
-
-\section{RNMI Handler Addresses}
-
-The RNMI interrupt trap handler address is implementation-defined.
-
-RNMI also has an associated exception trap handler address, which is
-implementation defined.
-
-\section{RNMI CSRs}
-
-This proposal adds additional M-mode CSRs to enable a resumable
-non-maskable interrupt (RNMI).
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt mnscratch} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Resumable NMI scratch register {\tt mnscratch}.}
-\label{fig:mnscratch}
-\end{figure*}
-
-The {\tt mnscratch} CSR holds an MXLEN-bit read-write register which
-enables the NMI trap handler to save and restore the context that was
-interrupted.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{J}
-\instbitrange{MXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{{\tt mnepc} (\warl)} \\
-\hline
-MXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Resumable NMI program counter {\tt mnepc}.}
-\label{fig:mnepc}
-\end{figure*}
-
-The {\tt mnepc} CSR is an MXLEN-bit read-write register which on entry
-to the NMI trap handler holds the PC of the instruction that took the
-interrupt.
-
-The low bit of {\tt mnepc} ({\tt mnepc[0]}) is
-always zero. On implementations that support only IALIGN=32, the two low bits
-({\tt mnepc[1:0]}) are always zero.
-
-If an implementation allows IALIGN to be either 16 or 32 (by
-changing CSR {\tt misa}, for example), then, whenever IALIGN=32, bit
-{\tt mnepc[1]} is masked on reads so that it appears to be 0. This
-masking occurs also for the implicit read by the MRET instruction.
-Though masked, {\tt mnepc[1]} remains writable when IALIGN=32.
-
-{\tt mnepc} is a \warl\ register that must be able to hold all valid
-virtual addresses. It need not be capable of holding all possible invalid
-addresses.
-Prior to writing {\tt mnepc}, implementations may convert an invalid address
-into some other invalid address that {\tt mnepc} is capable of holding.
-
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cU}
-\instbit{MXLEN-1} &
-\instbitrange{MXLEN-2}{0} \\
-\hline
-\multicolumn{1}{|c|}{1} &
-\multicolumn{1}{c|}{NMI Cause (\warl)} \\
-\hline
-1 & MXLEN-1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Resumable NMI cause {\tt mncause}.}
-\label{fig:mncause}
-\end{figure*}
-
-The {\tt mncause} CSR holds the reason for the NMI, with bit MXLEN-1 set to
-1, and the NMI cause encoded in the least-significant bits or zero if
-NMI causes are not supported.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{TRFcFcF}
-\instbitrange{MXLEN-1}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{8} &
-\instbit{7} &
-\instbitrange{6}{4} &
-\instbit{3} &
-\instbitrange{2}{0} \\
-\hline
-\multicolumn{1}{|c|}{\em Reserved} &
-\multicolumn{1}{c|}{MNPP (\warl)} &
-\multicolumn{1}{c|}{\em Reserved} &
-\multicolumn{1}{c|}{MNPV (\warl)} &
-\multicolumn{1}{c|}{\em Reserved} &
-\multicolumn{1}{c|}{NMIE} &
-\multicolumn{1}{c|}{\em Reserved} \\
-\hline
-MXLEN-13 & 2 & 3 & 1 & 3 & 1 & 3 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Resumable NMI status register {\tt mnstatus}.}
-\label{fig:mnstatus}
-\end{figure*}
-
-The {\tt mnstatus} CSR holds a two-bit field, MNPP, which on entry to the trap
-handler holds the privilege mode of the interrupted context, encoded
-in the same manner as {\tt mstatus}.MPP.
-It also holds a one-bit field, MNPV, which on entry to the trap handler holds
-the virtualization mode of the interrupted context, encoded in the same
-manner as {\tt mstatus}.MPV.
-
-{\tt mnstatus} also holds the NMIE bit.
-When NMIE=1, nonmaskable interrupts are enabled.
-When NMIE=0, {\em all} interrupts are disabled.
-
-When NMIE=0, the hart behaves as though {\tt mstatus}.MPRV were clear,
-regardless of the current setting of {\tt mstatus}.MPRV.
-
-Upon reset, NMIE contains the value 0.
-
-\begin{commentary}
-RNMIs are masked out of reset to give software the opportunity to initialize
-data structures and devices for subsequent RNMI handling.
-\end{commentary}
-
-Software can set NMIE to 1, but attempts to clear NMIE have no effect.
-
-\begin{commentary}
-Normally, only reset sequences will explicitly set the NMIE bit.
-\end{commentary}
-\begin{commentary}
-That the NMIE bit is settable does not suffice to support the nesting of
-RNMIs.
-To support this feature in a direct manner would have required allowing
-software to clear the NMIE bit---a design choice that would have contravened
-the concept of non-maskability.
-
-Software that wishes to minimize the latency until the next RNMI is taken can
-follow the top-half/bottom-half model, where the RNMI handler itself only
-enqueues a task to a task queue then returns.
-The bulk of the interrupt servicing is performed later, with RNMIs enabled.
-\end{commentary}
-
-For the purposes of the WFI instruction, NMIE is a global interrupt enable,
-meaning that the setting of NMIE does not affect the operation of the WFI
-instruction.
-
-The other
-bits in {\tt mnstatus} are {\em reserved}; software should write zeros and
-hardware implementations should return zeros.
-
-\section{MNRET Instruction}
-
-MNRET is an M-mode-only instruction that uses the values in {\tt mnepc} and
-{\tt mnstatus} to return to the program counter, privilege mode,
-and virtualization mode of the interrupted context.
-This instruction also sets {\tt mnstatus}.NMIE.
-
-\section{RNMI Operation}
-
-When an RNMI interrupt is detected, the interrupted PC is written to
-the {\tt mnepc} CSR, the type of RNMI to the {\tt mncause} CSR, and the
-privilege mode of the interrupted context to the {\tt mnstatus} CSR.
-The {\tt mnstatus}.NMIE bit is cleared, masking all interrupts.
-
-The hart then enters machine-mode and jumps to the RNMI trap handler
-address.
-
-The RNMI handler can resume original execution using the new MNRET
-instruction, which restores the PC from {\tt mnepc}, the privilege mode
-from {\tt mnstatus}, and also sets {\tt mnstatus}.NMIE, which
-re-enables interrupts.
-
-If the hart encounters an exception while the {\tt mnstatus}.NMIE bit is
-clear, the actions taken are the same as if the exception had occurred while
-{\tt mnstatus}.NMIE were set, except that the program counter is set to the
-RNMI exception trap handler address (rather than the address specified by
-{\tt mtvec}).
-
-\begin{commentary}
-The Smrnmi extension does not change the behavior of the MRET and SRET
-instructions.
-In particular, MRET and SRET are unaffected by the {\tt mnstatus}.NMIE bit,
-and their execution does not alter the {\tt mnstatus}.NMIE bit.
-\end{commentary}
diff --git a/src/latex/supervisor.tex b/src/latex/supervisor.tex
deleted file mode 100644
index 0f6ae58..0000000
--- a/src/latex/supervisor.tex
+++ /dev/null
@@ -1,2705 +0,0 @@
-\chapter{Supervisor-Level ISA, Version 1.12}
-\label{supervisor}
-
-This chapter describes the RISC-V supervisor-level architecture, which
-contains a common core that is used with various supervisor-level
-address translation and protection schemes.
-
-\begin{commentary}
-Supervisor mode is deliberately restricted in terms of interactions
-with underlying physical hardware, such as physical memory and device
-interrupts, to support clean virtualization.
-In this spirit, certain supervisor-level facilities, including requests for
-timer and interprocessor interrupts, are provided by implementation-specific
-mechanisms. In some systems, a supervisor execution environment (SEE)
-provides these facilities in a manner specified by a supervisor binary
-interface (SBI). Other systems supply these facilities directly, through some
-other implementation-defined mechanism.
-\end{commentary}
-
-\section{Supervisor CSRs}
-
-A number of CSRs are provided for the supervisor.
-
-\begin{commentary}
-The supervisor should only view CSR state that should be visible to a
-supervisor-level operating system. In particular, there is no
-information about the existence (or non-existence) of higher privilege
-levels (machine level or other) visible in the CSRs accessible by the
-supervisor.
-
-Many supervisor CSRs are a subset of the equivalent machine-mode CSR,
-and the machine-mode chapter should be read first to help understand
-the supervisor-level CSR descriptions.
-\end{commentary}
-
-\subsection{Supervisor Status Register (\tt sstatus)}
-\label{sstatus}
-
-
-The {\tt sstatus} register is an SXLEN-bit read/write register
-formatted as shown in Figure~\ref{sstatusreg-rv32} when SXLEN=32 and
-Figure~\ref{sstatusreg} when SXLEN=64. The {\tt sstatus}
-register keeps track of the processor's current operating state.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cEcccc}
-\\
-\instbit{31} &
-\instbitrange{30}{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{\wpri} &
- \\
-\hline
-1 & 11 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWWWWccccWcc}
-\\
-&
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor-mode status register ({\tt sstatus}) when SXLEN=32.}
-\label{sstatusreg-rv32}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cMFScccc}
-\\
-\instbit{63} &
-\instbitrange{62}{34} &
-\instbitrange{33}{32} &
-\instbitrange{31}{20} &
-\instbit{19} &
-\instbit{18} &
-\instbit{17} &
- \\
-\hline
-\multicolumn{1}{|c|}{SD} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UXL[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{MXR} &
-\multicolumn{1}{c|}{SUM} &
-\multicolumn{1}{c|}{\wpri} &
- \\
-\hline
-1 & 29 & 2 & 12 & 1 & 1 & 1 & \\
-\end{tabular}
-\begin{tabular}{cWWWWccccWcc}
-\\
-&
-\instbitrange{16}{15} &
-\instbitrange{14}{13} &
-\instbitrange{12}{11} &
-\instbitrange{10}{9} &
-\instbit{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
- &
-\multicolumn{1}{|c|}{XS[1:0]} &
-\multicolumn{1}{c|}{FS[1:0]} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{VS[1:0]} &
-\multicolumn{1}{c|}{SPP} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{UBE} &
-\multicolumn{1}{c|}{SPIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{SIE} &
-\multicolumn{1}{c|}{\wpri} \\
-\hline
- & 2 & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor-mode status register ({\tt sstatus}) when SXLEN=64.}
-\label{sstatusreg}
-\end{figure*}
-
-The SPP bit indicates the privilege level at which a hart was executing before
-entering supervisor mode. When a trap is taken, SPP is set to 0 if the trap
-originated from user mode, or 1 otherwise. When an SRET instruction
-(see Section~\ref{otherpriv}) is executed to return from the trap handler, the
-privilege level is set to user mode if the SPP bit is 0, or supervisor mode if
-the SPP bit is 1; SPP is then set to 0.
-
-The SIE bit enables or disables all interrupts in supervisor mode.
-When SIE is clear, interrupts are not taken while in supervisor mode.
-When the hart is running in user-mode, the value in SIE is ignored, and
-supervisor-level interrupts are enabled. The supervisor can disable
-individual interrupt sources using the {\tt sie} CSR.
-
-The SPIE bit indicates whether supervisor interrupts were enabled prior to
-trapping into supervisor mode. When a trap is taken into supervisor
-mode, SPIE is set to SIE, and SIE is set to 0. When an SRET instruction is
-executed, SIE is set to SPIE, then SPIE is set to 1.
-
-The {\tt sstatus} register is a subset of the {\tt mstatus} register.
-
-\begin{commentary}
-In a straightforward implementation, reading or writing any field in
-{\tt sstatus} is equivalent to reading or writing the homonymous field
-in {\tt mstatus}.
-\end{commentary}
-
-\subsubsection{Base ISA Control in {\tt sstatus} Register}
-
-The UXL field controls the value of XLEN for U-mode, termed {\em UXLEN},
-which may differ from the value of XLEN for S-mode, termed {\em SXLEN}. The
-encoding of UXL is the same as that of the MXL field of {\tt misa}, shown in
-Table~\ref{misabase}.
-
-When SXLEN=32, the UXL field does not exist, and UXLEN=32. When
-SXLEN=64, it is a \warl\ field that encodes the current value of UXLEN.
-In particular, an implementation may make UXL be a read-only field whose
-value always ensures that UXLEN=SXLEN.
-
-If UXLEN~$\ne$~SXLEN, instructions executed in the narrower mode must ignore
-source register operand bits above the configured XLEN, and must sign-extend
-results to fill the widest supported XLEN in the destination register.
-
-If UXLEN~$<$~SXLEN, user-mode instruction-fetch addresses and load and store
-effective addresses are taken modulo $2^{\text{UXLEN}}$. For example, when UXLEN=32
-and SXLEN=64, user-mode memory accesses reference the lowest \wunits{4}{GiB}
-of the address space.
-
-\subsubsection{Memory Privilege in {\tt sstatus} Register}
-\label{sec:sum}
-
-The MXR (Make eXecutable Readable) bit modifies the privilege with which loads
-access virtual memory. When MXR=0, only loads from pages marked readable (R=1
-in Figure~\ref{sv32pte}) will succeed. When MXR=1, loads from pages marked
-either readable or executable (R=1 or X=1) will succeed. MXR has no effect
-when page-based virtual memory is not in effect.
-
-The SUM (permit Supervisor User Memory access) bit modifies the privilege with
-which S-mode loads and stores access virtual memory.
-When SUM=0, S-mode memory accesses to pages that are accessible by U-mode (U=1
-in Figure~\ref{sv32pte}) will fault. When SUM=1, these accesses are permitted.
-SUM has no effect when page-based virtual memory is not in effect, nor when
-executing in U-mode. Note that S-mode can never execute instructions from user
-pages, regardless of the state of SUM.
-
-SUM is read-only 0 if {\tt satp}.MODE is read-only~0.
-
-\begin{commentary}
-The SUM mechanism prevents supervisor software from inadvertently accessing
-user memory. Operating systems can execute the majority of code with SUM clear;
-the few code segments that should access user memory can temporarily set
-SUM.
-
-The SUM mechanism does not avail S-mode software of permission to execute
-instructions in user code pages. Legitimate uses cases for execution from
-user memory in supervisor context are rare in general and nonexistent in POSIX
-environments. However, bugs in supervisors that lead to arbitrary code
-execution are much easier to exploit if the supervisor exploit code can be
-stored in a user buffer at a virtual address chosen by an attacker.
-
-Some non-POSIX single address space operating systems do allow certain
-privileged software to partially execute in supervisor mode, while most
-programs run in user mode, all in a shared address space. This use case can
-be realized by mapping the physical code pages at multiple virtual addresses
-with different permissions, possibly with the assistance of the
-instruction page-fault handler to direct supervisor software to use the
-alternate mapping.
-\end{commentary}
-
-\subsubsection{Endianness Control in {\tt sstatus} Register}
-
-The UBE bit is a \warl\ field that controls the endianness of explicit
-memory accesses made from U-mode, which may differ from the endianness of
-memory accesses in S-mode.
-An implementation may make UBE be a read-only field that always specifies
-the same endianness as for S-mode.
-
-UBE controls whether explicit
-load and store memory accesses made from U-mode are little-endian (UBE=0)
-or big-endian (UBE=1).
-
-UBE has no effect on instruction fetches, which are {\em implicit} memory
-accesses that are always little-endian.
-
-For {\em implicit} accesses to supervisor-level memory management data
-structures, such as page tables, S-mode endianness always applies and UBE
-is ignored.
-
-\begin{commentary}
-Standard RISC-V ABIs are expected to be purely little-endian-only or
-big-endian-only, with no accommodation for mixing endianness.
-Nevertheless, endianness control has been defined so as to permit an
-OS of one endianness to execute user-mode programs of the opposite
-endianness.
-\end{commentary}
-
-\subsection{Supervisor Trap Vector Base Address Register ({\tt stvec})}
-
-The {\tt stvec} register is an SXLEN-bit read/write register that holds
-trap vector configuration, consisting of a vector base address (BASE) and a
-vector mode (MODE).
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{J@{}R}
-\instbitrange{SXLEN-1}{2} &
-\instbitrange{1}{0} \\
-\hline
-\multicolumn{1}{|c|}{BASE[SXLEN-1:2] (\warl)} &
-\multicolumn{1}{c|}{MODE (\warl)} \\
-\hline
-SXLEN-2 & 2 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor trap vector base address register ({\tt stvec}).}
-\label{stvecreg}
-\end{figure*}
-
-The BASE field in {\tt stvec} is a \warl\ field that can hold any valid
-virtual or physical address, subject to the following alignment constraints:
-the address must be 4-byte aligned, and MODE settings other than Direct might
-impose additional alignment constraints on the value in the BASE field.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|c|l|}
-\hline
-Value & Name & Description \\
-\hline
-0 & Direct & All exceptions set {\tt pc} to BASE. \\
-1 & Vectored & Asynchronous interrupts set {\tt pc} to BASE+4$\times$cause. \\
-$\ge$2 & --- & {\em Reserved} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of {\tt stvec} MODE field.}
-\label{stvec-mode}
-\end{table*}
-
-The encoding of the MODE field is shown in Table~\ref{stvec-mode}. When
-MODE=Direct, all traps into supervisor mode cause the {\tt pc} to be set to the
-address in the BASE field. When MODE=Vectored, all synchronous exceptions
-into supervisor mode cause the {\tt pc} to be set to the address in the BASE
-field, whereas interrupts cause the {\tt pc} to be set to the address in
-the BASE field plus four times the interrupt cause number. For example,
-a supervisor-mode timer interrupt (see Table~\ref{scauses}) causes the {\tt pc}
-to be set to BASE+{\tt 0x14}.
-Setting MODE=Vectored may impose a stricter alignment constraint on BASE.
-
-\subsection{Supervisor Interrupt Registers ({\tt sip} and {\tt sie})}
-
-The {\tt sip} register is an SXLEN-bit read/write register containing
-information on pending interrupts, while {\tt sie} is the corresponding
-SXLEN-bit read/write register containing interrupt enable bits.
-Interrupt cause number \textit{i} (as reported in CSR {\tt scause},
-Section~\ref{sec:scause}) corresponds with bit~\textit{i} in both
-{\tt sip} and {\tt sie}.
-Bits 15:0 are allocated to standard interrupt causes only, while bits 16
-and above are designated for platform or custom use.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{SXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-SXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor interrupt-pending register ({\tt sip}).}
-\label{sipreg}
-\end{figure}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{SXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupts (\warl)} \\
-\hline
-SXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor interrupt-enable register ({\tt sie}).}
-\label{siereg}
-\end{figure}
-
-An interrupt~\textit{i} will trap to S-mode if both of the
-following are true:
-(a)~either the current privilege mode is S and the SIE bit in the
-{\tt sstatus} register is set, or the current privilege mode has less
-privilege than S-mode; and
-(b)~bit~\textit{i} is set in both {\tt sip} and {\tt sie}.
-
-These conditions for an interrupt trap to occur must be evaluated in a bounded
-amount of time from when an interrupt becomes, or ceases to be,
-pending in {\tt sip}, and must
-also be evaluated immediately following the execution of an SRET instruction
-or an explicit write to a CSR on which these interrupt trap conditions
-expressly depend (including {\tt sip}, {\tt sie} and {\tt sstatus}).
-
-Interrupts to S-mode take priority over any interrupts to lower privilege
-modes.
-
-Each individual bit in register {\tt sip} may be writable or may be
-read-only.
-When bit~\textit{i} in {\tt sip} is writable, a pending interrupt
-\textit{i} can be cleared by writing 0 to this bit.
-If interrupt \textit{i} can become pending but bit~\textit{i} in
-{\tt sip} is read-only, the implementation must provide some other
-mechanism for clearing the pending interrupt (which may involve a call to
-the execution environment).
-
-A bit in {\tt sie} must be writable if the corresponding interrupt can
-ever become pending.
-Bits of {\tt sie} that are not writable are read-only zero.
-
-The standard portions (bits 15:0) of registers {\tt sip} and {\tt sie}
-are formatted as shown in Figures \ref{sipreg-standard} and
-\ref{siereg-standard} respectively.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{ScFcFcc}
-\instbitrange{15}{10} &
-\instbit{9} &
-\instbitrange{8}{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SEIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIP} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIP} &
-\multicolumn{1}{c|}{0} \\
-\hline
-6 & 1 & 3 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt sip}.}
-\label{sipreg-standard}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{ScFcFcc}
-\instbitrange{15}{10} &
-\instbit{9} &
-\instbitrange{8}{6} &
-\instbit{5} &
-\instbitrange{4}{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{0} &
-\multicolumn{1}{c|}{SEIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{STIE} &
-\multicolumn{1}{c|}{0} &
-\multicolumn{1}{c|}{SSIE} &
-\multicolumn{1}{c|}{0} \\
-\hline
-6 & 1 & 3 & 1 & 3 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Standard portion (bits 15:0) of {\tt sie}.}
-\label{siereg-standard}
-\end{figure*}
-
-Bits {\tt sip}.SEIP and {\tt sie}.SEIE are the interrupt-pending and
-interrupt-enable bits for supervisor-level external interrupts.
-If implemented, SEIP is read-only in {\tt sip}, and is set and cleared by
-the execution environment, typically through a platform-specific
-interrupt controller.
-
-Bits {\tt sip}.STIP and {\tt sie}.STIE are the interrupt-pending and
-interrupt-enable bits for supervisor-level timer interrupts.
-If implemented, STIP is read-only in {\tt sip}, and is set and cleared by
-the execution environment.
-
-Bits {\tt sip}.SSIP and {\tt sie}.SSIE are the interrupt-pending and
-interrupt-enable bits for supervisor-level software interrupts.
-If implemented, SSIP is writable in {\tt sip} and may also be set
-to 1 by a platform-specific interrupt controller.
-
-\begin{commentary}
-Interprocessor interrupts are sent to other harts by implementation-specific
-means, which will ultimately cause the SSIP bit to be set in the recipient
-hart's {\tt sip} register.
-\end{commentary}
-
-Each standard interrupt type (SEI, STI, or SSI) may not be implemented,
-in which case the corresponding interrupt-pending and interrupt-enable
-bits are read-only zeros.
-All bits in {\tt sip} and {\tt sie} are \warl\ fields.
-The implemented interrupts may be found by writing one to every bit
-location in {\tt sie}, then reading back to see which bit positions hold
-a one.
-
-\begin{commentary}
-The {\tt sip} and {\tt sie} registers are subsets of the {\tt mip} and {\tt
-mie} registers. Reading any implemented field,
-or writing any writable field, of {\tt sip}/{\tt sie}
-effects a read or write of the homonymous field of {\tt mip}/{\tt mie}.
-
-Bits 3, 7, and 11 of {\tt sip} and {\tt sie} correspond to the machine-mode
-software, timer, and external interrupts, respectively. Since most platforms
-will choose not to make these interrupts delegatable from M-mode to S-mode,
-they are shown as 0 in Figures~\ref{sipreg-standard} and
-\ref{siereg-standard}.
-\end{commentary}
-
-Multiple simultaneous
-interrupts destined for supervisor mode are handled in the following
-decreasing priority order: SEI, SSI, STI.
-
-\subsection{Supervisor Timers and Performance Counters}
-
-Supervisor software uses the same hardware performance monitoring facility
-as user-mode software, including the {\tt time}, {\tt cycle}, and {\tt instret}
-CSRs. The implementation should provide a mechanism to modify the
-counter values.
-
-The implementation must provide a facility for scheduling timer interrupts in
-terms of the real-time counter, {\tt time}.
-
-\subsection{Counter-Enable Register ({\tt scounteren})}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\setlength{\tabcolsep}{4pt}
-\begin{tabular}{cccMcccccc}
-\instbit{31} &
-\instbit{30} &
-\instbit{29} &
-\instbitrange{28}{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{HPM31} &
-\multicolumn{1}{c|}{HPM30} &
-\multicolumn{1}{c|}{HPM29} &
-\multicolumn{1}{c|}{...} &
-\multicolumn{1}{c|}{HPM5} &
-\multicolumn{1}{c|}{HPM4} &
-\multicolumn{1}{c|}{HPM3} &
-\multicolumn{1}{c|}{IR} &
-\multicolumn{1}{c|}{TM} &
-\multicolumn{1}{c|}{CY} \\
-\hline
-1 & 1 & 1 & 23 & 1 & 1 & 1 & 1 & 1 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Counter-enable register ({\tt scounteren}).}
-\label{scounteren}
-\end{figure*}
-
-The counter-enable register {\tt scounteren} is a 32-bit register that
-controls the availability of the hardware performance monitoring counters to
-U-mode.
-
-When the CY, TM, IR, or HPM{\em n} bit in the {\tt scounteren} register is
-clear, attempts to read the {\tt cycle}, {\tt time}, {\tt instret}, or
-{\tt hpmcounter{\em n}} register while executing in U-mode
-will cause an illegal instruction exception. When one of these bits is set,
-access to the corresponding register is permitted.
-
-{\tt scounteren} must be implemented. However, any of the bits may be
-read-only zero, indicating reads to the corresponding counter will
-cause an exception when executing in U-mode.
-Hence, they are effectively \warl\ fields.
-
-\begin{commentary}
-The setting of a bit in {\tt mcounteren} does not affect whether the
-corresponding bit in {\tt scounteren} is writable.
-However, U-mode may only access a counter if the corresponding bits in {\tt
-scounteren} and {\tt mcounteren} are both set.
-\end{commentary}
-
-\subsection{Supervisor Scratch Register ({\tt sscratch})}
-
-The {\tt sscratch} register is an SXLEN-bit read/write register,
-dedicated for use by the supervisor. Typically, {\tt sscratch} is
-used to hold a pointer to the hart-local supervisor context while the
-hart is executing user code. At the beginning of a trap handler, {\tt
- sscratch} is swapped with a user register to provide an initial
-working register.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{SXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt sscratch} \\
-\hline
-SXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor Scratch Register.}
-\label{kregs}
-\end{figure}
-
-\subsection{Supervisor Exception Program Counter ({\tt sepc})}
-
-{\tt sepc} is an SXLEN-bit read/write register formatted as shown in
-Figure~\ref{epcreg}. The low bit of {\tt sepc} ({\tt sepc[0]}) is
-always zero. On implementations that support only IALIGN=32, the two low bits
-({\tt sepc[1:0]}) are always zero.
-
-If an implementation allows IALIGN to be either 16 or 32 (by
-changing CSR {\tt misa}, for example), then, whenever IALIGN=32, bit
-{\tt sepc[1]} is masked on reads so that it appears to be 0. This
-masking occurs also for the implicit read by the SRET instruction.
-Though masked, {\tt sepc[1]} remains writable when IALIGN=32.
-
-{\tt sepc} is a \warl\ register that must be able to hold all valid
-virtual addresses. It need not be capable of holding all possible invalid
-addresses.
-Prior to writing {\tt sepc}, implementations may convert an invalid address
-into some other invalid address that {\tt sepc} is capable of holding.
-
-When a trap is taken into S-mode, {\tt sepc} is written with the
-virtual address of the instruction that was interrupted or that
-encountered the exception. Otherwise, {\tt sepc} is never written by
-the implementation, though it may be explicitly written by software.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{SXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt sepc} \\
-\hline
-SXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor exception program counter register.}
-\label{epcreg}
-\end{figure}
-
-\subsection{Supervisor Cause Register ({\tt scause})}
-\label{sec:scause}
-
-The {\tt scause} register is an SXLEN-bit read-write register formatted as
-shown in Figure~\ref{scausereg}. When a trap is taken into S-mode, {\tt
-scause} is written with a code indicating the event that caused the trap.
-Otherwise, {\tt scause} is never written by the implementation, though it may be
-explicitly written by software.
-
-The Interrupt bit in the {\tt scause} register is set if the
-trap was caused by an interrupt. The Exception Code field
-contains a code identifying the last exception or interrupt. Table~\ref{scauses}
-lists the possible exception codes for the current supervisor ISAs.
-The Exception Code is a \wlrl\ field. It is required to hold
-the values 0--31 (i.e., bits 4--0 must be implemented), but otherwise
-it is only guaranteed to hold supported exception codes.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}U}
-\instbit{SXLEN-1} &
-\instbitrange{SXLEN-2}{0} \\
-\hline
-\multicolumn{1}{|c|}{Interrupt} &
-\multicolumn{1}{c|}{Exception Code (\wlrl)} \\
-\hline
-1 & SXLEN-1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor Cause register {\tt scause}.}
-\label{scausereg}
-\end{figure*}
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|r|r|l|l|}
-
- \hline
- Interrupt & Exception Code & Description \\
- \hline
- 1 & 0 & {\em Reserved} \\
- 1 & 1 & Supervisor software interrupt \\
- 1 & 2--4 & {\em Reserved} \\
- 1 & 5 & Supervisor timer interrupt \\
- 1 & 6--8 & {\em Reserved} \\
- 1 & 9 & Supervisor external interrupt \\
- 1 & 10--15 & {\em Reserved} \\
- 1 & $\ge$16 & {\em Designated for platform use} \\ \hline
- 0 & 0 & Instruction address misaligned \\
- 0 & 1 & Instruction access fault \\
- 0 & 2 & Illegal instruction \\
- 0 & 3 & Breakpoint \\
- 0 & 4 & Load address misaligned \\
- 0 & 5 & Load access fault \\
- 0 & 6 & Store/AMO address misaligned \\
- 0 & 7 & Store/AMO access fault \\
- 0 & 8 & Environment call from U-mode \\
- 0 & 9 & Environment call from S-mode \\
- 0 & 10--11 & {\em Reserved} \\
- 0 & 12 & Instruction page fault \\
- 0 & 13 & Load page fault \\
- 0 & 14 & {\em Reserved} \\
- 0 & 15 & Store/AMO page fault \\
- 0 & 16--23 & {\em Reserved} \\
- 0 & 24--31 & {\em Designated for custom use} \\
- 0 & 32--47 & {\em Reserved} \\
- 0 & 48--63 & {\em Designated for custom use} \\
- 0 & $\ge$64 & {\em Reserved} \\
- \hline
-\end{tabular}
-\end{center}
-\caption{Supervisor cause register ({\tt scause}) values after trap.
-Synchronous exception priorities are given by Table~\ref{exception-priority}.}
-\label{scauses}
-\end{table*}
-
-\subsection{Supervisor Trap Value ({\tt stval}) Register}
-
-The {\tt stval} register is an SXLEN-bit read-write register formatted as shown
-in Figure~\ref{stvalreg}. When a trap is taken into S-mode, {\tt stval} is
-written with exception-specific information to assist software in handling the
-trap. Otherwise, {\tt stval} is never written by the implementation, though
-it may be explicitly written by software. The hardware platform will specify
-which exceptions must set {\tt stval} informatively and which may
-unconditionally set it to zero.
-
-
-If {\tt stval} is written with a nonzero value when a breakpoint,
-address-misaligned, access-fault, or page-fault exception occurs on an
-instruction fetch, load, or store, then {\tt stval} will contain the faulting
-virtual address.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}J}
-\instbitrange{SXLEN-1}{0} \\
-\hline
-\multicolumn{1}{|c|}{\tt stval} \\
-\hline
-SXLEN \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor Trap Value register.}
-\label{stvalreg}
-\end{figure}
-
-If {\tt stval} is written with a nonzero value when a misaligned load or store
-causes an access-fault or page-fault exception, then {\tt stval} will contain
-the virtual address of the portion of the access that caused the fault.
-
-If {\tt stval} is written with a nonzero value when an instruction access-fault
-or page-fault exception occurs on a system with variable-length instructions,
-then {\tt stval} will contain the virtual address of the portion of the
-instruction that caused the fault, while {\tt sepc} will point to the beginning
-of the instruction.
-
-The {\tt stval} register can optionally also be used to return the faulting
-instruction bits on an illegal instruction exception ({\tt sepc} points to the
-faulting instruction in memory).
-If {\tt stval} is written with a nonzero value when an illegal-instruction
-exception occurs, then {\tt stval} will contain the shortest of:
-\begin{compactitem}
-\item the actual faulting instruction
-\item the first ILEN bits of the faulting instruction
-\item the first SXLEN bits of the faulting instruction
-\end{compactitem}
-The value loaded into {\tt stval} on an illegal-instruction exception is
-right-justified and all unused upper bits are cleared to zero.
-
-For other traps, {\tt stval} is set to zero, but a future standard may
-redefine {\tt stval}'s setting for other traps.
-
-{\tt stval} is a \warl\ register that must be able to hold all valid
-virtual addresses and the value 0. It need not be capable of holding all
-possible invalid addresses.
-Prior to writing {\tt stval}, implementations may convert an invalid address
-into some other invalid address that {\tt stval} is capable of holding.
-If the feature to return the faulting instruction bits is implemented, {\tt
-stval} must also be able to hold all values less than $2^N$, where $N$ is the
-smaller of SXLEN and ILEN.
-
-\subsection{Supervisor Environment Configuration Register ({\tt senvcfg})}
-
-The {\tt senvcfg} CSR is an SXLEN-bit read/write register,
-formatted as shown in Figure~\ref{fig:senvcfg},
-that controls certain characteristics of the U-mode execution environment.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}Kcc@{}W@{}Wc}
-\instbitrange{SXLEN-1}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbitrange{5}{4} &
-\instbitrange{3}{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{\wpri} &
-\multicolumn{1}{c|}{CBZE} &
-\multicolumn{1}{c|}{CBCFE} &
-\multicolumn{1}{c|}{CBIE} &
-\multicolumn{1}{c|}{\wpri} &
-\multicolumn{1}{c|}{FIOM} \\
-\hline
-SXLEN-8 & 1 & 1 & 2 & 3 & 1 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Supervisor environment configuration register ({\tt senvcfg}).}
-\label{fig:senvcfg}
-\end{figure}
-
-If bit FIOM (Fence of I/O implies Memory) is set to one in {\tt senvcfg},
-FENCE instructions executed in U-mode are modified so
-the requirement to order accesses to device I/O implies also the requirement
-to order main memory accesses.
-Table~\ref{tab:senvcfg-FIOM} details the modified interpretation of
-FENCE instruction bits PI, PO, SI, and SO in U-mode when FIOM=1.
-
-Similarly, for U-mode when FIOM=1,
-if an atomic instruction that accesses a region ordered as device I/O
-has its {\em aq} and/or {\em rl} bit set, then that instruction is ordered
-as though it accesses both device I/O and memory.
-
-If {\tt satp}.MODE is read-only zero (always Bare), the implementation may make FIOM read-only zero.
-
-\begin{table}[h!]
-\begin{center}
-\begin{tabular}{|c|l|}
-\hline
-Instruction bit & Meaning when set \\
-\hline
-PI & Predecessor device input and memory reads (PR implied) \\
-PO & Predecessor device output and memory writes (PW implied) \\
-\hline
-SI & Successor device input and memory reads (SR implied) \\
-SO & Successor device output and memory writes (SW implied) \\
-\hline
-\end{tabular}
-\end{center}
-\vspace{-0.1in}
-\caption{%
-Modified interpretation of FENCE predecessor and successor sets in U-mode when FIOM=1.}
-\label{tab:senvcfg-FIOM}
-\end{table}
-
-\begin{commentary}
-Bit FIOM exists for a specific circumstance when an I/O device is
-being emulated for U-mode and both of the following are true:
-(a)~the emulated device has a memory buffer that should be I/O space
-but is actually mapped to main memory via address translation, and
-(b)~multiple physical harts are involved in accessing this emulated
-device from U-mode.
-
-A hypervisor running in S-mode without the benefit of the hypervisor
-extension of Chapter~\ref{hypervisor} may need to emulate a device for
-U-mode if paravirtualization cannot be employed.
-If the same hypervisor provides a virtual machine (VM) with multiple
-virtual harts, mapped one-to-one to real harts, then multiple harts may
-concurrently access the emulated device, perhaps because:
-(a)~the guest OS within the VM assigns device interrupt handling to one
-hart while the device is also accessed by a different hart outside of
-an interrupt handler, or
-(b)~control of the device (or partial control) is being migrated
-from one hart to another, such as for interrupt load balancing within
-the VM.
-For such cases, guest software within the VM is expected to properly
-coordinate access to the (emulated) device across multiple harts using
-mutex locks and/or interprocessor interrupts as usual, which in part
-entails executing I/O fences.
-But those I/O fences may not be sufficient if some of the device
-``I/O'' is actually main memory, unknown to the guest.
-Setting FIOM=1 modifies those fences (and all other I/O fences executed
-in U-mode) to include main memory, too.
-
-Software can always avoid the need to set FIOM by never using main
-memory to emulate a device memory buffer that should be I/O space.
-However, this choice usually requires trapping all U-mode accesses
-to the emulated buffer, which might have a noticeable impact on
-performance.
-The alternative offered by FIOM is sufficiently inexpensive to implement that
-we consider it worth supporting even if only rarely enabled.
-\end{commentary}
-
-
-The definition of the CBZE field will be furnished by the
-forthcoming Zicboz extension.
-Its allocation within {\tt senvcfg} may change prior to the ratification
-of that extension.
-
-The definitions of the CBCFE and CBIE fields will be furnished by the
-forthcoming Zicbom extension.
-Their allocations within {\tt senvcfg} may change prior to the ratification
-of that extension.
-
-\subsection{Supervisor Address Translation and Protection ({\tt satp}) Register}
-\label{sec:satp}
-
-The {\tt satp} register is an SXLEN-bit read/write register, formatted as shown
-in Figure~\ref{rv32satp} for SXLEN=32 and Figure~\ref{rv64satp} for SXLEN=64, which
-controls supervisor-mode address translation and protection.
-This register holds the physical page number (PPN) of the root page
-table, i.e., its supervisor physical address divided by \wunits{4}{KiB};
-an address space identifier (ASID), which facilitates address-translation
-fences on a per-address-space basis; and the MODE field, which selects the
-current address-translation scheme. Further details on the access to this
-register are described in Section~\ref{virt-control}.
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}E@{}K}
-\instbit{31} &
-\instbitrange{30}{22} &
-\instbitrange{21}{0} \\
-\hline
-\multicolumn{1}{|c|}{{\tt MODE} (\warl)} &
-\multicolumn{1}{|c|}{{\tt ASID} (\warl)} &
-\multicolumn{1}{|c|}{{\tt PPN} (\warl)} \\
-\hline
-1 & 9 & 22 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{%
-Supervisor address translation and protection register {\tt satp}
-when SXLEN=32.%
-}
-\label{rv32satp}
-\end{figure}
-
-\begin{commentary}
-Storing a PPN in {\tt satp}, rather than a physical address, supports
-a physical address space larger than \wunits{4}{GiB} for RV32.
-
-The {\tt satp}.PPN field might not be capable of holding all physical page
-numbers.
-Some platform standards might place constraints on the values {\tt satp}.PPN
-may assume, e.g., by requiring that all physical page numbers corresponding to
-main memory be representable.
-\end{commentary}
-
-\begin{figure}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}S@{}T@{}U}
-\instbitrange{63}{60} &
-\instbitrange{59}{44} &
-\instbitrange{43}{0} \\
-\hline
-\multicolumn{1}{|c|}{{\tt MODE} (\warl)} &
-\multicolumn{1}{|c|}{{\tt ASID} (\warl)} &
-\multicolumn{1}{|c|}{{\tt PPN} (\warl)} \\
-\hline
-4 & 16 & 44 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{%
-Supervisor address translation and protection register {\tt satp}
-when SXLEN=64, for MODE values Bare, Sv39, Sv48, and Sv57.%
-}
-\label{rv64satp}
-\end{figure}
-
-\begin{commentary}
-We store the ASID and the page table base address in the same CSR to allow the
-pair to be changed atomically on a context switch. Swapping them
-non-atomically could pollute the old virtual address space with new
-translations, or vice-versa. This approach also slightly reduces the cost of
-a context switch.
-\end{commentary}
-
-Table~\ref{tab:satp-mode} shows the encodings of the MODE field when SXLEN=32 and
-SXLEN=64. When MODE=Bare, supervisor virtual addresses are equal to
-supervisor physical addresses, and there is no additional memory protection
-beyond the physical memory protection scheme described in
-Section~\ref{sec:pmp}.
-To select MODE=Bare, software must write zero to the remaining fields of
-{\tt satp} (bits 30--0 when SXLEN=32, or bits 59--0 when SXLEN=64).
-Attempting to select MODE=Bare with a nonzero pattern in the remaining fields
-has an \unspecified\ effect on the value that the remaining fields assume
-and an \unspecified\ effect on address translation and protection behavior.
-
-When SXLEN=32, the {\tt satp} encodings corresponding to MODE=Bare and ASID[8:7]=3 are designated
-for custom use, whereas the encodings corresponding to MODE=Bare and ASID[8:7]$\ne$3 are
-reserved for future standard use.
-When SXLEN=64, all {\tt satp} encodings corresponding to MODE=Bare are reserved for future
-standard use.
-
-\begin{commentary}
-Version 1.11 of this standard stated that the remaining fields in {\tt satp}
-had no effect when MODE=Bare.
-Making these fields reserved facilitates future definition of
-additional translation and protection modes, particularly in RV32, for which
-all patterns of the existing MODE field have already been allocated.
-\end{commentary}
-
-When SXLEN=32, the only other valid setting for MODE is Sv32, a paged
-virtual-memory scheme described in Section~\ref{sec:sv32}.
-
-When SXLEN=64, three paged virtual-memory schemes are defined: Sv39, Sv48, and Sv57,
-described in Sections~\ref{sec:sv39}, \ref{sec:sv48}, and \ref{sec:sv57}, respectively.
-One additional scheme, Sv64, will be defined in a later version
-of this specification. The remaining MODE settings are reserved
-for future use and may define different interpretations of the other fields in
-{\tt satp}.
-
-Implementations are not required to support all MODE settings,
-and if {\tt satp} is written with an unsupported MODE, the entire write has
-no effect; no fields in {\tt satp} are modified.
-
-\begin{table}[h]
-\begin{center}
-\begin{tabular}{|c|c|l|}
-\hline
-\multicolumn{3}{|c|}{SXLEN=32} \\
-\hline
-Value & Name & Description \\
-\hline
-0 & Bare & No translation or protection. \\
-1 & Sv32 & Page-based 32-bit virtual addressing (see Section~\ref{sec:sv32}). \\
-\hline \hline
-\multicolumn{3}{|c|}{SXLEN=64} \\
-\hline
-Value & Name & Description \\
-\hline
-0 & Bare & No translation or protection. \\
-1--7 & --- & {\em Reserved for standard use} \\
-8 & Sv39 & Page-based 39-bit virtual addressing (see Section~\ref{sec:sv39}). \\
-9 & Sv48 & Page-based 48-bit virtual addressing (see Section~\ref{sec:sv48}). \\
-10 & Sv57 & Page-based 57-bit virtual addressing (see Section~\ref{sec:sv57}). \\
-11 & {\em Sv64} & {\em Reserved for page-based 64-bit virtual addressing.} \\
-12--13 & --- & {\em Reserved for standard use} \\
-14--15 & --- & {\em Designated for custom use} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of {\tt satp} MODE field.}
-\label{tab:satp-mode}
-\end{table}
-
-The number of ASID bits is \unspecified\ and may be zero. The
-number of implemented ASID bits, termed {\mbox {\em ASIDLEN}}, may be
-determined by writing one to every bit position in the ASID field, then
-reading back the value in {\tt satp} to see which bit positions in the ASID
-field hold a one. The least-significant bits of ASID are implemented first:
-that is, if ASIDLEN~$>$~0, ASID[ASIDLEN-1:0] is writable. The maximal value
-of ASIDLEN, termed ASIDMAX, is 9 for Sv32 or 16 for Sv39, Sv48, and Sv57.
-
-\begin{commentary}
-For many applications, the choice of page size has a substantial
-performance impact. A large page size increases TLB reach and loosens
-the associativity constraints on virtually indexed, physically tagged
-caches. At the same time, large pages exacerbate internal
-fragmentation, wasting physical memory and possibly cache capacity.
-
-After much deliberation, we have settled on a conventional page size
-of 4 KiB for both RV32 and RV64. We expect this decision to ease the
-porting of low-level runtime software and device drivers. The TLB
-reach problem is ameliorated by transparent superpage support in
-modern operating systems~\cite{transparent-superpages}. Additionally,
-multi-level TLB hierarchies are quite inexpensive relative to the
-multi-level cache hierarchies whose address space they map.
-\end{commentary}
-
-The {\tt satp} register is considered {\em active} when the effective
-privilege mode is S-mode or U-mode.
-Executions of the
-address-translation algorithm may only begin using a given value of {\tt satp}
-when {\tt satp} is active.
-
-\begin{commentary}
-Translations that began while {\tt satp} was active are not required to
-complete or terminate when {\tt satp} is no longer active, unless an
-SFENCE.VMA instruction matching the address and ASID is executed. The
-SFENCE.VMA instruction must be used to ensure that updates to the
-address-translation data structures are observed by subsequent implicit reads
-to those structures by a hart.
-\end{commentary}
-
-Note that writing {\tt satp} does not imply any ordering constraints
-between page-table updates and subsequent address translations, nor does
-it imply any invalidation of address-translation caches.
-If the new address space's page tables have been modified, or if an
-ASID is reused, it may be necessary to execute an SFENCE.VMA instruction
-(see Section~\ref{sec:sfence.vma}) after, or in some cases before,
-writing {\tt satp}.
-
-\begin{commentary}
-Not imposing upon implementations to flush address-translation caches
-upon {\tt satp} writes reduces the cost of context switches, provided
-a sufficiently large ASID space.
-\end{commentary}
-
-\section{Supervisor Instructions}
-
-In addition to the SRET instruction defined in
-Section~\ref{otherpriv}, one new supervisor-level instruction is
-provided.
-
-\subsection{Supervisor Memory-Management Fence Instruction}
-\label{sec:sfence.vma}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-SFENCE.VMA & asid & vaddr & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The supervisor memory-management fence instruction SFENCE.VMA is used to
-synchronize updates to in-memory memory-management data structures with
-current execution. Instruction execution causes implicit reads and writes to
-these data structures; however, these implicit references are ordinarily not
-ordered with respect to explicit loads and stores. Executing
-an SFENCE.VMA instruction guarantees that any previous stores already visible
-to the current RISC-V hart are ordered before certain implicit references by
-subsequent instructions in that hart to the memory-management data structures.
-The specific set of operations ordered by SFENCE.VMA is
-determined by {\em rs1} and {\em rs2}, as described below.
-SFENCE.VMA is also used to invalidate entries in the
-address-translation cache associated with a hart (see
-Section~\ref{sv32algorithm}).
-Further details on the behavior of this instruction are
-described in Section~\ref{virt-control} and Section~\ref{pmp-vmem}.
-
-\begin{commentary}
-The SFENCE.VMA is used to flush any local hardware caches related to
-address translation. It is specified as a fence rather than a TLB
-flush to provide cleaner semantics with respect to which instructions
-are affected by the flush operation and to support a wider variety of
-dynamic caching structures and memory-management schemes. SFENCE.VMA
-is also used by higher privilege levels to synchronize page table
-writes and the address translation hardware.
-\end{commentary}
-
-SFENCE.VMA orders only the local hart's implicit references to the
-memory-management data structures.
-
-\begin{commentary}
-Consequently, other harts must be notified separately when the
-memory-management data structures have been modified.
-One approach is to use 1)
-a local data fence to ensure local writes are visible globally, then
-2) an interprocessor interrupt to the other thread, then 3) a local
-SFENCE.VMA in the interrupt handler of the remote thread, and finally
-4) signal back to originating thread that operation is complete. This
-is, of course, the RISC-V analog to a TLB shootdown.
-\end{commentary}
-
-For the common case that the translation data structures have only been
-modified for a single address mapping (i.e., one page or superpage), {\em rs1}
-can specify a virtual address within that mapping to effect a translation
-fence for that mapping only. Furthermore, for the common case that the
-translation data structures have only been modified for a single address-space
-identifier, {\em rs2} can specify the address space. The behavior of
-SFENCE.VMA depends on {\em rs1} and {\em rs2} as follows:
-
-\begin{itemize}
-\item If {\em rs1}={\tt x0} and {\em rs2}={\tt x0}, the fence orders all
- reads and writes made to any level of the page tables, for all
- address spaces. The fence also invalidates all address-translation
- cache entries, for all address spaces.
-\item If {\em rs1}={\tt x0} and {\em rs2}$\neq${\tt x0}, the fence orders
- all reads and writes made to any level of the page tables, but only
- for the address space identified by integer register {\em rs2}.
- Accesses to {\em global} mappings (see Section~\ref{sec:translation})
- are not ordered. The fence also invalidates all address-translation
- cache entries matching the address space identified by integer register
- {\em rs2}, except for entries containing global mappings.
-\item If {\em rs1}$\neq${\tt x0} and {\em rs2}={\tt x0}, the fence orders
- only reads and writes made to leaf page table entries corresponding
- to the virtual address in {\em rs1}, for all address spaces.
- The fence also invalidates all address-translation cache entries that
- contain leaf page table entries corresponding to the virtual address
- in {\em rs1}, for all address spaces.
-\item If {\em rs1}$\neq${\tt x0} and {\em rs2}$\neq${\tt x0}, the fence
- orders only reads and writes made to leaf page table entries
- corresponding to the virtual address in {\em rs1}, for the address
- space identified by integer register {\em rs2}.
- Accesses to global mappings are not ordered. The fence also
- invalidates all address-translation cache entries that contain leaf
- page table entries corresponding to the virtual address in {\em rs1}
- and that match the address space identified by integer register {\em
- rs2}, except for entries containing global mappings.
-\end{itemize}
-
-If the value held in {\em rs1} is not a valid virtual address, then the
-SFENCE.VMA instruction has no effect. No exception is raised in this case.
-
-When {\em rs2}$\neq${\tt x0}, bits SXLEN-1:ASIDMAX of the value held in {\em
-rs2} are reserved for future standard use. Until their use is defined by a
-standard extension, they should be zeroed by software and ignored
-by current implementations. Furthermore, if ASIDLEN~$<$~ASIDMAX, the
-implementation shall ignore bits ASIDMAX-1:ASIDLEN of the value held in {\em
-rs2}.
-
-\begin{commentary}
-It is always legal to over-fence, e.g., by fencing only based on a subset
-of the bits in {\em rs1} and/or {\em rs2}, and/or by simply treating all
-SFENCE.VMA instructions as having {\em rs1}={\tt x0} and/or
-{\em rs2}={\tt x0}. For example, simpler implementations can ignore the
-virtual address in {\em rs1} and the ASID value in {\em rs2} and always perform
-a global fence. The choice not to raise an exception when an invalid virtual
-address is held in {\em rs1} facilitates this type of simplification.
-\end{commentary}
-
-An implicit read of the memory-management data structures may return any
-translation for an address that was valid at
-any time since the most recent SFENCE.VMA that subsumes that address. The
-ordering implied by SFENCE.VMA does not place implicit reads and writes to the
-memory-management data structures into the global memory order in a way that
-interacts cleanly with the standard RVWMO ordering rules. In particular, even
-though an SFENCE.VMA orders prior explicit accesses before subsequent implicit
-accesses, and those implicit accesses are ordered before their associated
-explicit accesses, SFENCE.VMA does not necessarily place prior explicit
-accesses before subsequent explicit accesses in the global memory order. These
-implicit loads also need not otherwise obey normal program order semantics with
-respect to prior loads or stores to the same address.
-
-\begin{commentary}
-A consequence of this specification is that an implementation may use any
-translation for an address that was valid at any time since the most recent
-SFENCE.VMA that subsumes that address.
-In particular, if a leaf PTE is modified but a subsuming SFENCE.VMA is not
-executed, either the old translation or the new translation will be used, but
-the choice is unpredictable.
-The behavior is otherwise well-defined.
-
-In a conventional TLB design, it is possible for multiple entries to match a
-single address if, for example, a page is upgraded to a superpage without first
-clearing the original non-leaf PTE's valid bit and executing an SFENCE.VMA with
-{\em rs1}={\tt x0}.
-In this case, a similar remark applies: it is unpredictable whether the old
-non-leaf PTE or the new leaf PTE is used, but the behavior is otherwise well
-defined.
-
-Another consequence of this specification is that it is generally unsafe to
-update a PTE using a set of stores of a width less than the width of the PTE,
-as it is legal for the implementation to read the PTE at any time, including
-when only some of the partial stores have taken effect.
-\end{commentary}
-
-\begin{commentary}
-This specification permits the caching of PTEs whose V (Valid) bit is clear.
-Operating systems must be written to cope with this possibility, but implementers
-are reminded that eagerly caching invalid PTEs will reduce performance by causing
-additional page faults.
-\end{commentary}
-
-Implementations must only perform implicit reads of the translation
-data structures pointed to by the current contents of the {\tt satp}
-register or a subsequent valid (V=1) translation data structure entry,
-and must only raise exceptions for implicit accesses that are
-generated as a result of instruction execution, not those that are
-performed speculatively.
-
-Changes to the {\tt sstatus} fields SUM and MXR take effect immediately,
-without the need to execute an SFENCE.VMA instruction.
-Changing {\tt satp}.MODE from Bare to other modes and vice versa also
-takes effect immediately, without the need to execute an SFENCE.VMA
-instruction.
-Likewise, changes to {\tt satp}.ASID take effect immediately.
-
-\begin{commentary}
-The following common situations typically require executing an
-SFENCE.VMA instruction:
-
-\vspace{-0.1in}
-\begin{itemize}
-
-\item When software recycles an ASID (i.e., reassociates it with a different
-page table), it should {\em first} change {\tt satp} to point to the new page
-table using the recycled ASID, {\em then} execute SFENCE.VMA with {\em
-rs1}={\tt x0} and {\em rs2} set to the recycled ASID. Alternatively, software
-can execute the same SFENCE.VMA instruction while a different ASID is loaded
-into {\tt satp}, provided the next time {\tt satp} is loaded with the recycled
-ASID, it is simultaneously loaded with the new page table.
-
-\item If the implementation does not provide ASIDs, or software chooses to
-always use ASID 0, then after every {\tt satp} write, software should execute
-SFENCE.VMA with {\em rs1}={\tt x0}. In the common case that no global
-translations have been modified, {\em rs2} should be set to a register other than
-{\tt x0} but which contains the value zero, so that global translations are
-not flushed.
-
-\item If software modifies a non-leaf PTE, it should execute SFENCE.VMA with
-{\em rs1}={\tt x0}. If any PTE along the traversal path had its G bit set,
-{\em rs2} must be {\tt x0}; otherwise, {\em rs2} should be set to the ASID for
-which the translation is being modified.
-
-\item If software modifies a leaf PTE, it should execute SFENCE.VMA with {\em
-rs1} set to a virtual address within the page. If any PTE along the traversal
-path had its G bit set, {\em rs2} must be {\tt x0}; otherwise, {\em rs2}
-should be set to the ASID for which the translation is being modified.
-
-\item For the special cases of increasing the permissions on a leaf PTE and
-changing an invalid PTE to a valid leaf, software may choose to execute
-the SFENCE.VMA lazily. After modifying the PTE but before executing
-SFENCE.VMA, either the new or old permissions will be used. In the latter
-case, a page-fault exception might occur, at which point software should
-execute SFENCE.VMA in accordance with the previous bullet point.
-
-\end{itemize}
-\end{commentary}
-
-If a hart employs an address-translation cache, that cache must appear to be
-private to that hart.
-In particular, the meaning of an ASID is local to a hart; software may choose
-to use the same ASID to refer to different address spaces on different harts.
-
-\begin{commentary}
-A future extension could redefine ASIDs to be global across the SEE, enabling
-such options as shared translation caches and hardware support for broadcast
-TLB shootdown.
-However, as OSes have evolved to significantly reduce the scope of TLB
-shootdowns using novel ASID-management techniques, we expect the local-ASID
-scheme to remain attractive for its simplicity and possibly better
-scalability.
-\end{commentary}
-
-For implementations that make {\tt satp}.MODE read-only zero (always Bare), attempts to
-execute an SFENCE.VMA instruction might raise an illegal instruction
-exception.
-
-\section{Sv32: Page-Based 32-bit Virtual-Memory Systems}
-\label{sec:sv32}
-
-When Sv32 is written to the MODE field in the {\tt satp} register (see
-Section~\ref{sec:satp}), the supervisor operates in a 32-bit paged
-virtual-memory system. In this mode, supervisor and user virtual addresses
-are translated into supervisor physical addresses by traversing a radix-tree
-page table. Sv32 is supported when SXLEN=32 and is designed to include
-mechanisms sufficient for supporting modern Unix-based operating systems.
-
-\begin{commentary}
-The initial RISC-V paged virtual-memory architectures have been
-designed as straightforward implementations to support existing
-operating systems. We have architected page table layouts to support
-a hardware page-table walker. Software TLB refills are a performance
-bottleneck on high-performance systems, and are especially troublesome
-with decoupled specialized coprocessors. An implementation can choose
-to implement software TLB refills using a machine-mode trap handler as
-an extension to M-mode.
-\end{commentary}
-
-\begin{commentary}
-Some ISAs architecturally expose \emph{virtually indexed, physically tagged}
-caches, in that accesses to the same physical address via different virtual
-addresses might not be coherent unless the virtual addresses lie within the
-same cache set.
-Implicitly, this specification does not permit such behavior to be
-architecturally exposed.
-\end{commentary}
-
-\subsection{Addressing and Memory Protection}
-\label{sec:translation}
-
-Sv32 implementations support a 32-bit virtual address space, divided
-into \wunits{4}{KiB} pages. An Sv32 virtual address is partitioned
-into a virtual page number (VPN) and page offset, as shown in
-Figure~\ref{sv32va}. When Sv32 virtual memory mode is selected in the
-MODE field of the {\tt satp} register, supervisor virtual addresses
-are translated into supervisor physical addresses via a two-level page
-table. The 20-bit VPN is translated into a 22-bit physical page
-number (PPN), while the 12-bit page offset is untranslated. The
-resulting supervisor-level physical addresses are then checked using
-any physical memory protection structures (Sections~\ref{sec:pmp}),
-before being directly converted to machine-level physical addresses.
-If necessary, supervisor-level physical addresses are zero-extended
-to the number of physical address bits found in the implementation.
-
-\begin{commentary}
-For example, consider an RV32 system supporting 34 bits of physical
-address. When the value of {\tt satp}.MODE is Sv32, a 34-bit physical
-address is produced directly, and therefore no zero-extension is needed.
-When the value of {\tt satp}.MODE is Bare, the 32-bit virtual address is
-translated (unmodified) into a 32-bit physical address, and then that
-physical address is zero-extended into a 34-bit machine-level physical
-address.
-\end{commentary}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}O@{}E}
-\instbitrange{31}{22} &
-\instbitrange{21}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-10 & 10 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv32 virtual address.}
-\label{sv32va}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}E}
-\instbitrange{33}{22} &
-\instbitrange{21}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-12 & 10 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv32 physical address.}
-\label{rv32va}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}Fcccccccc}
-\instbitrange{31}{20} &
-\instbitrange{19}{10} &
-\instbitrange{9}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{RSW} &
-\multicolumn{1}{c|}{D} &
-\multicolumn{1}{c|}{A} &
-\multicolumn{1}{c|}{G} &
-\multicolumn{1}{c|}{U} &
-\multicolumn{1}{c|}{X} &
-\multicolumn{1}{c|}{W} &
-\multicolumn{1}{c|}{R} &
-\multicolumn{1}{c|}{V} \\
-\hline
-12 & 10 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv32 page table entry.}
-\label{sv32pte}
-\end{figure*}
-
-Sv32 page tables consist of $2^{10}$ page-table entries (PTEs), each
-of four bytes. A page table is exactly the size of a page and must
-always be aligned to a page boundary. The physical page number of the
-root page table is stored in the {\tt satp} register.
-
-The PTE format for Sv32 is shown in Figures~\ref{sv32pte}. The V bit
-indicates whether the PTE is valid; if it is 0, all other bits in the PTE are
-don't-cares and may be used freely by software. The permission bits, R, W,
-and X, indicate whether the page is readable, writable, and executable,
-respectively. When all three are zero, the PTE is a pointer to the next level
-of the page table; otherwise, it is a leaf PTE. Writable pages must also be
-marked readable; the contrary combinations are reserved for future use.
-Table~\ref{pteperm} summarizes the encoding of the permission bits.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c|c||l|}
-\hline
-X & W & R & Meaning \\
-\hline
-0 & 0 & 0 & Pointer to next level of page table. \\
-0 & 0 & 1 & Read-only page. \\
-0 & 1 & 0 & {\em Reserved for future use.} \\
-0 & 1 & 1 & Read-write page. \\
-1 & 0 & 0 & Execute-only page. \\
-1 & 0 & 1 & Read-execute page. \\
-1 & 1 & 0 & {\em Reserved for future use.} \\
-1 & 1 & 1 & Read-write-execute page. \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encoding of PTE R/W/X fields.}
-\label{pteperm}
-\end{table*}
-
-Attempting to fetch an instruction from a page that does not have execute
-permissions raises a fetch page-fault exception. Attempting to execute
-a load or load-reserved instruction whose effective address lies within
-a page without read permissions raises a load page-fault exception.
-Attempting to execute a store, store-conditional,
-or AMO instruction whose effective address lies within a page without
-write permissions raises a store page-fault exception.
-\begin{commentary}
-AMOs never raise load page-fault exceptions. Since any unreadable page is
-also unwritable, attempting to perform an AMO on an unreadable page always
-raises a store page-fault exception.
-\end{commentary}
-
-The U bit indicates whether the page is accessible to user mode.
-U-mode software may only access the page when U=1. If the SUM bit
-in the {\tt sstatus} register is
-set, supervisor mode software may also access pages with U=1.
-However, supervisor code normally operates with the SUM bit clear, in
-which case, supervisor code will fault on accesses to user-mode pages.
-Irrespective of SUM, the supervisor may not execute code on pages with U=1.
-
-\begin{commentary}
-An alternative PTE format would support different permissions for supervisor
-and user. We omitted this feature because it would be largely redundant with
-the SUM mechanism (see Section~\ref{sec:sum}) and would require more encoding
-space in the PTE.
-\end{commentary}
-
-The G bit designates a {\em global} mapping. Global mappings are those that
-exist in all address spaces. For non-leaf PTEs, the global setting implies
-that all mappings in the subsequent levels of the page table are global. Note
-that failing to mark a global mapping as global merely reduces performance,
-whereas marking a non-global mapping as global is a software bug that,
-after switching to an address space with a different non-global mapping for
-that address range, can unpredictably result in either mapping being used.
-
-\begin{commentary}
-Global mappings need not be stored redundantly in address-translation caches
-for multiple ASIDs. Additionally, they need not be flushed from local
-address-translation caches when an SFENCE.VMA instruction is executed with
-{\em rs2}$\neq${\tt x0}.
-\end{commentary}
-
-The RSW field is reserved for use by supervisor software; the implementation
-shall ignore this field.
-
-Each leaf PTE contains an accessed (A) and dirty (D) bit. The A bit indicates
-the virtual page has been read, written, or fetched from since the last time
-the A bit was cleared. The D bit indicates the virtual page has been written
-since the last time the D bit was cleared.
-
-Two schemes to manage the A and D bits are permitted:
-\begin{itemize}
-\item When a virtual page is accessed and the A bit is clear, or is
- written and the D bit is clear, a page-fault exception is raised.
-
-\item When a virtual page is accessed and the A bit is clear, or is
- written and the D bit is clear, the implementation sets the
- corresponding bit(s) in the PTE. The PTE update must be atomic with
- respect to other accesses to the PTE, and must atomically check
- that the PTE is valid and grants sufficient permissions. Updates
- of the A bit may be performed as a result of speculation, but updates
- to the D bit must be exact (i.e., not speculative), and observed
- in program order by the local hart. Furthermore, the PTE update
- must appear in the global memory order no later than the explicit
- memory access, or any subsequent explicit memory access to that
- virtual page by the local hart. The ordering on loads and stores
- provided by FENCE instructions and the acquire/release bits on atomic
- instructions also orders the PTE updates associated with those loads
- and stores as observed by remote harts.
-
- The PTE update is not required to be atomic with respect to the explicit
- memory access that caused the update, and the sequence is interruptible.
- However, the hart must not perform the explicit memory access before the
- PTE update is globally visible.
-\end{itemize}
-All harts in a system must employ the same PTE-update scheme as each other.
-
-\begin{commentary}
-Prior versions of this specification required PTE A bit updates to be exact,
-but allowing the A bit to be updated as a result of speculation simplifies
-the implementation of address translation prefetchers. System software
-typically uses the A bit as a page replacement policy hint, but does not
-require exactness for functional correctness. On the other hand, D bit updates
-are still required to be exact and performed in program order, as the D bit
-affects the functional correctness of page eviction.
-
-Implementations are of course still permitted to perform both A and D bit
-updates only in an exact manner.
-
-In both cases, requiring atomicity ensures that the PTE update will not be
-interrupted by other intervening writes to the page table, as such interruptions
-could lead to A/D bits being set on PTEs that have been reused for other
-purposes, on memory that has been reclaimed for other purposes, and so on.
-Simple implementations may instead generate page-fault exceptions.
-
-The A and D bits are never cleared by the implementation. If the
-supervisor software does not rely on accessed and/or dirty bits,
-e.g. if it does not swap memory pages to secondary storage or if the
-pages are being used to map I/O space, it should always set them to 1
-in the PTE to improve performance.
-\end{commentary}
-
-Any level of PTE may be a leaf PTE, so in addition to 4 KiB pages, Sv32
-supports 4 MiB {\em megapages}. A megapage must be virtually and
-physically aligned to a 4 MiB boundary; a page-fault exception is raised
-if the physical address is insufficiently aligned.
-
-For non-leaf PTEs, the D, A, and U bits are reserved for future standard
-use. Until their use is defined by a standard extension, they
-must be cleared by software for forward compatibility.
-
-For implementations with both page-based virtual memory and the ``A'' standard
-extension, the LR/SC reservation set must lie completely within a single
-base physical page (i.e., a naturally aligned \wunits{4}{KiB}
-physical-memory region).
-
-\subsection{Virtual Address Translation Process}
-\label{sv32algorithm}
-
-A virtual address $va$ is translated into a physical address $pa$ as
-follows:
-
-\begin{enumerate}
-
-\item Let $a$ be ${\tt satp}.ppn \times \textrm{PAGESIZE}$, and let $i=\textrm{LEVELS} - 1$. (For Sv32, PAGESIZE=$2^{12}$ and LEVELS=2.)
- The {\tt satp} register must be {\em active}, i.e., the effective privilege
- mode must be S-mode or U-mode.
-
-\item Let $pte$ be the value of the PTE at address
- $a+va.vpn[i]\times \textrm{PTESIZE}$. (For Sv32, PTESIZE=4.)
- If accessing $pte$ violates a PMA or PMP check, raise an
- access-fault exception corresponding to the original access type.
-
-\item If $pte.v=0$, or if $pte.r=0$ and $pte.w=1$, or if any bits or encodings
- that are reserved for future standard use are set within $pte$, stop and
- raise a page-fault exception corresponding to the original access type.
-
-\item Otherwise, the PTE is valid.
- If $pte.r=1$ or $pte.x=1$, go to step 5.
- Otherwise, this PTE is a pointer to the next level of the page table. Let
- $i=i-1$. If $i<0$, stop and raise a page-fault exception
- corresponding to the original access type. Otherwise, let
- $a=pte.ppn \times \textrm{PAGESIZE}$ and go to step 2.
-
-\item A leaf PTE has been found. Determine if the requested memory access is
- allowed by the $pte.r$, $pte.w$, $pte.x$, and $pte.u$ bits, given the
- current privilege mode and the value of the SUM and MXR fields of
- the {\tt mstatus} register. If not, stop and raise a page-fault
- exception corresponding to the original access type.
-
-\item If $i>0$ and $pte.ppn[i-1:0]\neq 0$, this is a misaligned superpage;
- stop and raise a page-fault exception corresponding to the original access type.
-
-\item If $pte.a=0$, or if the original memory access is a store and $pte.d=0$, either
- raise a page-fault exception corresponding to the original access type, or:
- \begin{itemize}
- \item If a store to $pte$ would violate a PMA or PMP check, raise an
- access-fault exception corresponding to the original access type.
- \item Perform the following steps atomically:
- \begin{itemize}
- \item Compare $pte$ to the value of the PTE at address $a+va.vpn[i]\times \textrm{PTESIZE}$.
- \item If the values match, set $pte.a$ to 1 and, if the original memory
- access is a store, also set $pte.d$ to 1.
- \item If the comparison fails, return to step 2
- \end{itemize}
- \end{itemize}
-
-\item The translation is successful. The translated physical address is
- given as follows:
-\begin{itemize}
-\item $\textit{pa.pgoff} = \textit{va.pgoff}$.
-\item If $i>0$, then this is a superpage translation and $pa.ppn[i-1:0]=va.vpn[i-1:0]$.
-\item $pa.ppn[\textrm{LEVELS} - 1:i] = pte.ppn[\textrm{LEVELS} - 1:i]$.
-\end{itemize}
-
-\end{enumerate}
-
-All implicit accesses to the address-translation data structures in this
-algorithm are performed using width PTESIZE.
-
-\begin{commentary}
-This implies, for example, that an Sv48 implementation may not use two separate
-4B reads to non-atomically access a single 8B PTE, and that A/D bit updates
-performed by the implementation are treated as atomically updating the entire
-PTE, rather than just the A and/or D bit alone (even though the PTE value does
-not otherwise change).
-\end{commentary}
-
-The results of implicit address-translation reads in step 2 may be held in a
-read-only, incoherent {\em address-translation cache} but not shared with other
-harts. The address-translation cache may hold an arbitrary number of entries,
-including an arbitrary number of entries for the same address and ASID.
-Entries in the address-translation cache may then satisfy subsequent step 2
-reads if the ASID associated with the entry matches the ASID loaded in step 0
-or if the entry is associated with a {\em global} mapping. To ensure that
-implicit reads observe writes to the same memory locations, an SFENCE.VMA
-instruction must be executed after the writes to flush the relevant cached
-translations.
-
-The address-translation cache cannot be used in step 7; accessed and
-dirty bits may only be updated in memory directly.
-
-\begin{commentary}
- It is permitted for multiple address-translation cache entries to co-exist
- for the same address. This represents the fact that in a conventional TLB
- hierarchy, it is possible for multiple entries to match a single address if, for
- example, a page is upgraded to a superpage without first clearing the
- original non-leaf PTE's valid bit and executing an SFENCE.VMA with {\em
- rs1}={\tt x0}, or if multiple TLBs exist in parallel at a given level of the
- hierarchy. In this case, just as if an SFENCE.VMA is not executed between
- a write to the memory-management tables and subsequent implicit read of the
- same address: it is unpredictable whether the old non-leaf PTE or the new leaf
- PTE is used, but the behavior is otherwise well defined.
-\end{commentary}
-
-Implementations may also execute the address-translation algorithm
-speculatively at any time, for any virtual address, as long as {\tt satp} is
-active (as defined in Section~\ref{sec:satp}). Such speculative executions
-have the effect of pre-populating the address-translation cache.
-
-Speculative executions of the address-translation algorithm behave as
-non-speculative executions of the algorithm do, except that they must not set the
-dirty bit for a PTE, they must not trigger an exception, and they must not create
-address-translation cache entries if those entries would have been invalidated
-by any SFENCE.VMA instruction executed by the hart since the speculative
-execution of the algorithm began.
-
-\begin{commentary}
- For instance, it is illegal for both non-speculative and speculative
- executions of the translation algorithm to begin, read the level 2 page table,
- pause while the hart executes an SFENCE.VMA with {\em rs1}={\em rs2}={\tt x0},
- then resume using the now-stale level 2 PTE, as subsequent implicit reads
- could populate the address-translation cache with stale PTEs.
-
- In many implementations, an SFENCE.VMA instruction with {\em rs1}={\tt x0}
- will therefore either terminate all previously-launched speculative
- executions of the address-translation algorithm (for the specified ASID, if
- applicable), or simply wait for them to complete (in which case any
- address-translation cache entries created will be invalidated by the
- SFENCE.VMA as appropriate). Likewise, an SFENCE.VMA instruction with {\em
- rs1}$\neq${\tt x0} generally must either ensure that previously-launched
- speculative executions of the address-translation algorithm (for the specified
- ASID, if applicable) are prevented from creating new address-translation cache
- entries mapping leaf PTEs, or wait for them to complete.
-
- A consequence of implementations being permitted to read the translation data
- structures arbitrarily early and speculatively is that at any time, all
- page table entries reachable by executing the algorithm may be loaded into
- the address-translation cache.
-
- Although it would be uncommon to place page tables in non-idempotent memory,
- there is no explicit prohibition against doing so. Since the algorithm may
- only touch page tables reachable from the root page table indicated in {\tt
- satp}, the range of addresses that an implementation's page table walker will
- touch is fully under supervisor control.
-\end{commentary}
-
-\begin{commentary}
-The algorithm does not admit the possibility of ignoring high-order PPN bits
-for implementations with narrower physical addresses.
-\end{commentary}
-
-\section{Sv39: Page-Based 39-bit Virtual-Memory System}
-\label{sec:sv39}
-
-This section describes a simple paged virtual-memory system
-for SXLEN=64, which supports 39-bit virtual address spaces. The
-design of Sv39 follows the overall scheme of Sv32, and this section
-details only the differences between the schemes.
-
-\begin{commentary}
-We specified multiple virtual memory systems for RV64 to relieve the tension
-between providing a large address space and minimizing address-translation
-cost. For many systems, \wunits{512}{GiB} of virtual-address space is ample,
-and so Sv39 suffices. Sv48 increases the virtual address space to
-\wunits{256}{TiB}, but increases the physical memory
-capacity dedicated to page tables, the latency of page-table traversals, and
-the size of hardware structures that store virtual addresses. Sv57 increases
-the virtual address space, page table capacity requirement, and translation
-latency even further.
-\end{commentary}
-
-\subsection{Addressing and Memory Protection}
-
-Sv39 implementations support a 39-bit virtual address space, divided
-into \wunits{4}{KiB} pages. An Sv39 address is partitioned as
-shown in Figure~\ref{sv39va}.
-Instruction fetch addresses and load and store effective addresses,
-which are 64 bits, must have bits 63--39 all equal to bit 38, or else
-a page-fault exception will occur. The 27-bit VPN is translated into a
-44-bit PPN via a three-level page table, while the 12-bit page offset
-is untranslated.
-
-\begin{commentary}
-When mapping between narrower and wider addresses, RISC-V
-zero-extends a narrower physical address to a wider size. The mapping
-between 64-bit virtual addresses and the 39-bit usable address
-space of Sv39 is not based on zero-extension but instead follows an
-entrenched convention that allows an OS to use one or a few of the
-most-significant bits of a full-size (64-bit) virtual address to
-quickly distinguish user and supervisor address regions.
-\end{commentary}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}O@{}O@{}O}
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv39 virtual address.}
-\label{sv39va}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}T@{}O@{}O@{}O}
-\instbitrange{55}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-26 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv39 physical address.}
-\label{sv39pa}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{cF@{}Y@{}Y@{}Y@{}Y@{}Fcccccccc}
-\instbit{63} &
-\instbitrange{62}{61} &
-\instbitrange{60}{54} &
-\instbitrange{53}{28} &
-\instbitrange{27}{19} &
-\instbitrange{18}{10} &
-\instbitrange{9}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{N} &
-\multicolumn{1}{c|}{PBMT} &
-\multicolumn{1}{c|}{\it Reserved} &
-\multicolumn{1}{c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{RSW} &
-\multicolumn{1}{c|}{D} &
-\multicolumn{1}{c|}{A} &
-\multicolumn{1}{c|}{G} &
-\multicolumn{1}{c|}{U} &
-\multicolumn{1}{c|}{X} &
-\multicolumn{1}{c|}{W} &
-\multicolumn{1}{c|}{R} &
-\multicolumn{1}{c|}{V} \\
-\hline
-1 & 2 & 7 & 26 & 9 & 9 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv39 page table entry.}
-\label{sv39pte}
-\end{figure*}
-
-Sv39 page tables contain $2^9$ page table entries (PTEs), eight
-bytes each. A page table is exactly the size of a page and must
-always be aligned to a page boundary. The physical page number of the
-root page table is stored in the {\tt satp} register's PPN field.
-
-The PTE format for Sv39 is shown in Figure~\ref{sv39pte}. Bits 9--0
-have the same meaning as for Sv32.
-Bit 63 is reserved for use by the Svnapot extension in
-Chapter~\ref{svnapot}. If Svnapot is not implemented, bit 63 remains
-reserved and must be zeroed by software for forward compatibility,
-or else a page-fault exception is raised.
-Bits 62--61 are reserved for use by the Svpbmt extension in
-Chapter~\ref{svpbmt}. If Svpbmt is not implemented, bits 62--61 remain
-reserved and must be zeroed by software for forward compatibility,
-or else a page-fault exception is raised.
-Bits 60--54 are reserved
-for future standard use and, until their use is defined by some standard
-extension, must be zeroed by software for forward compatibility.
-If any of these bits are set, a page-fault exception is raised.
-
-\begin{commentary}
-We reserved several PTE bits for a possible extension that improves
-support for sparse address spaces by allowing page-table levels to be
-skipped, reducing memory usage and TLB refill latency. These reserved
-bits may also be used to facilitate research experimentation. The
-cost is reducing the physical address space, but \wunits{64}{PiB} is
-presently ample. When it no longer suffices, the reserved
-bits that remain unallocated could be used to expand the physical
-address space.
-\end{commentary}
-
-Any level of PTE may be a leaf PTE, so in addition to \wunits{4}{KiB}
-pages, Sv39 supports \wunits{2}{MiB} {\em megapages} and
-\wunits{1}{GiB} {\em gigapages}, each of which must be virtually and
-physically aligned to a boundary equal to its size.
-A page-fault exception is raised if the physical address is insufficiently
-aligned.
-
-The algorithm for virtual-to-physical address translation is the same as in
-Section~\ref{sv32algorithm}, except LEVELS equals 3 and PTESIZE equals 8.
-
-\section{Sv48: Page-Based 48-bit Virtual-Memory System}
-\label{sec:sv48}
-
-This section describes a simple paged virtual-memory system
-for SXLEN=64, which supports 48-bit virtual address spaces. Sv48
-is intended for systems for which a 39-bit virtual address space is
-insufficient. It closely follows the design of Sv39, simply adding an
-additional level of page table, and so this chapter only details the
-differences between the two schemes.
-
-Implementations that support Sv48 must also support Sv39.
-
-\begin{commentary}
-Systems that support Sv48 can also support Sv39 at essentially no cost, and so
-should do so to maintain compatibility with supervisor software that assumes
-Sv39.
-\end{commentary}
-
-\subsection{Addressing and Memory Protection}
-
-Sv48 implementations support a 48-bit virtual address space, divided
-into \wunits{4}{KiB} pages. An Sv48 address is partitioned as
-shown in Figure~\ref{sv48va}.
-Instruction fetch addresses and load and store effective addresses,
-which are 64 bits, must have bits 63--48 all equal to bit 47, or else
-a page-fault exception will occur. The 36-bit VPN is translated into a
-44-bit PPN via a four-level page table, while the 12-bit page offset
-is untranslated.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}O@{}O@{}O@{}O@{}O}
-\instbitrange{47}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[3]} &
-\multicolumn{1}{c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-9 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv48 virtual address.}
-\label{sv48va}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}E@{}O@{}O@{}O@{}O}
-\instbitrange{55}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{PPN[3]} &
-\multicolumn{1}{c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-17 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv48 physical address.}
-\label{sv48pa}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{cF@{}F@{}F@{}F@{}F@{}F@{}Fcccccccc}
-\instbit{63} &
-\instbitrange{62}{61} &
-\instbitrange{60}{54} &
-\instbitrange{53}{37} &
-\instbitrange{36}{28} &
-\instbitrange{27}{19} &
-\instbitrange{18}{10} &
-\instbitrange{9}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{N} &
-\multicolumn{1}{c|}{PBMT} &
-\multicolumn{1}{c|}{\it Reserved} &
-\multicolumn{1}{c|}{PPN[3]} &
-\multicolumn{1}{c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{RSW} &
-\multicolumn{1}{c|}{D} &
-\multicolumn{1}{c|}{A} &
-\multicolumn{1}{c|}{G} &
-\multicolumn{1}{c|}{U} &
-\multicolumn{1}{c|}{X} &
-\multicolumn{1}{c|}{W} &
-\multicolumn{1}{c|}{R} &
-\multicolumn{1}{c|}{V} \\
-\hline
-1 & 2 & 7 & 17 & 9 & 9 & 9 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv48 page table entry.}
-\label{sv48pte}
-\end{figure*}
-
-The PTE format for Sv48 is shown in Figure~\ref{sv48pte}. Bits 63--54 and 9--0
-have the same meaning as for Sv39. Any level of PTE may be a leaf
-PTE, so in addition to \wunits{4}{KiB} pages, Sv48 supports
-\wunits{2}{MiB} {\em megapages}, \wunits{1}{GiB} {\em gigapages}, and
-\wunits{512}{GiB} {\em terapages}, each of which must be virtually and
-physically aligned to a boundary equal to its size.
-A page-fault exception is raised if the physical address is insufficiently
-aligned.
-
-The algorithm for virtual-to-physical address translation is the same
-as in Section~\ref{sv32algorithm}, except LEVELS equals 4 and PTESIZE
-equals 8.
-
-\section{Sv57: Page-Based 57-bit Virtual-Memory System}
-\label{sec:sv57}
-
-This section describes a simple paged virtual-memory system designed
-for RV64 systems, which supports 57-bit virtual address spaces. Sv57
-is intended for systems for which a 48-bit virtual address space is
-insufficient. It closely follows the design of Sv48, simply adding an
-additional level of page table, and so this chapter only details the
-differences between the two schemes.
-
-Implementations that support Sv57 must also support Sv48.
-
-\begin{commentary}
-Systems that support Sv57 can also support Sv48 at essentially no cost, and so
-should do so to maintain compatibility with supervisor software that assumes
-Sv48.
-\end{commentary}
-
-\subsection{Addressing and Memory Protection}
-
-Sv57 implementations support a 57-bit virtual address space, divided
-into \wunits{4}{KiB} pages. An Sv57 address is partitioned as
-shown in Figure~\ref{sv57va}.
-Instruction fetch addresses and load and store effective addresses,
-which are 64 bits, must have bits 63--57 all equal to bit 56, or else
-a page-fault exception will occur. The 45-bit VPN is translated into a
-44-bit PPN via a five-level page table, while the 12-bit page offset
-is untranslated.
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}S@{}S@{}S@{}S@{}S@{}S}
-\instbitrange{56}{48} &
-\instbitrange{47}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{VPN[4]} &
-\multicolumn{1}{c|}{VPN[3]} &
-\multicolumn{1}{c|}{VPN[2]} &
-\multicolumn{1}{c|}{VPN[1]} &
-\multicolumn{1}{c|}{VPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-9 & 9 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv57 virtual address.}
-\label{sv57va}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{@{}R@{}S@{}S@{}S@{}S@{}S}
-\instbitrange{55}{48} &
-\instbitrange{47}{39} &
-\instbitrange{38}{30} &
-\instbitrange{29}{21} &
-\instbitrange{20}{12} &
-\instbitrange{11}{0} \\
-\hline
-\multicolumn{1}{|c|}{PPN[4]} &
-\multicolumn{1}{c|}{PPN[3]} &
-\multicolumn{1}{c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} &
-\multicolumn{1}{c|}{page offset} \\
-\hline
-8 & 9 & 9 & 9 & 9 & 12 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv57 physical address.}
-\label{sv57pa}
-\end{figure*}
-
-\begin{figure*}[h!]
-{\footnotesize
-\begin{center}
-\begin{tabular}{c@{}F@{}Y@{}T@{}Wcccccccc}
-\instbit{63} &
-\instbitrange{62}{61} &
-\instbitrange{60}{54} &
-\instbitrange{53}{10} &
-\instbitrange{9}{8} &
-\instbit{7} &
-\instbit{6} &
-\instbit{5} &
-\instbit{4} &
-\instbit{3} &
-\instbit{2} &
-\instbit{1} &
-\instbit{0} \\
-\hline
-\multicolumn{1}{|c|}{N} &
-\multicolumn{1}{c|}{PBMT} &
-\multicolumn{1}{c|}{\it Reserved} &
-\multicolumn{1}{c|}{PPN} &
-\multicolumn{1}{c|}{RSW} &
-\multicolumn{1}{c|}{D} &
-\multicolumn{1}{c|}{A} &
-\multicolumn{1}{c|}{G} &
-\multicolumn{1}{c|}{U} &
-\multicolumn{1}{c|}{X} &
-\multicolumn{1}{c|}{W} &
-\multicolumn{1}{c|}{R} &
-\multicolumn{1}{c|}{V} \\
-\hline
-1 & 2 & 7 & 44 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\
-\end{tabular}
-
-\begin{tabular}{@{}F@{}F@{}F@{}F@{}F}
-\instbitrange{53}{46} &
-\instbitrange{45}{37} &
-\instbitrange{36}{28} &
-\instbitrange{27}{19} &
-\instbitrange{18}{10} \\
-\hline
-\multicolumn{1}{|c|}{PPN[4]} &
-\multicolumn{1}{c|}{PPN[3]} &
-\multicolumn{1}{c|}{PPN[2]} &
-\multicolumn{1}{c|}{PPN[1]} &
-\multicolumn{1}{c|}{PPN[0]} \\
-\hline
-8 & 9 & 9 & 9 & 9 \\
-\end{tabular}
-\end{center}
-}
-\vspace{-0.1in}
-\caption{Sv57 page table entry.}
-\label{sv57pte}
-\end{figure*}
-
-The PTE format for Sv57 is shown in Figure~\ref{sv57pte}. Bits 63--54 and 9--0
-have the same meaning as for Sv39. Any level of PTE may be a leaf
-PTE, so in addition to \wunits{4}{KiB} pages, Sv57 supports
-\wunits{2}{MiB} {\em megapages}, \wunits{1}{GiB} {\em gigapages},
-\wunits{512}{GiB} {\em terapages}, and \wunits{256}{TiB} {\em petapages},
-each of which must be virtually and physically aligned to a boundary equal
-to its size. A page-fault exception is raised if the physical address is
-insufficiently aligned.
-
-The algorithm for virtual-to-physical address translation is the same
-as in Section~\ref{sv32algorithm}, except LEVELS equals 5 and PTESIZE
-equals 8.
-
-\chapter{``Svnapot'' Standard Extension for NAPOT Translation Contiguity, Version 1.0}
-\label{svnapot}
-
-In Sv39, Sv48, and Sv57, when a PTE has N=1, the PTE represents a
-translation that is part of a range of contiguous virtual-to-physical
-translations with the same values for PTE bits 5--0. Such ranges must be of a
-naturally aligned power-of-2 (NAPOT) granularity larger than the base page
-size.
-
-The Svnapot extension depends on Sv39.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c||l|c|}
-\hline
-i & $pte.ppn[i]$ & Description & $pte.napot\_bits$ \\
-\hline
-0 & {\tt x~xxxx~xxx1} & {\em Reserved} & $-$ \\
-0 & {\tt x~xxxx~xx1x} & {\em Reserved} & $-$ \\
-0 & {\tt x~xxxx~x1xx} & {\em Reserved} & $-$ \\
-0 & {\tt x~xxxx~1000} & 64 KiB contiguous region & 4 \\
-0 & {\tt x~xxxx~0xxx} & {\em Reserved} & $-$ \\
-$\geq 1$ & {\tt x~xxxx~xxxx} & {\em Reserved} & $-$ \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Page table entry encodings when $pte$.N=1}
-\label{ptenapot}
-\end{table*}
-
-NAPOT PTEs behave identically to non-NAPOT PTEs within the address-translation
-algorithm in Section~\ref{sv32algorithm}, except that:
-\begin{itemize}
- \item If the encoding in $pte$ is valid according to Table~\ref{ptenapot},
- then instead of returning the original value of $pte$, implicit reads of a
- NAPOT PTE return a copy of $pte$ in which $pte.ppn[i][pte.napot\_bits-1:0]$ is
- replaced by $vpn[i][pte.napot\_bits-1:0]$. If the encoding in $pte$ is
- reserved according to Table~\ref{ptenapot}, then a page-fault exception
- must be raised.
- \item Implicit reads of NAPOT page table entries may create address-translation cache
- entries mapping $a + j \times \textrm{PTESIZE}$ to a copy of $pte$
- in which $pte.ppn[i][pte.napot\_bits-1:0]$ is replaced by
- $vpn[i][pte.napot\_bits-1:0]$, for any or all $j$ such that
- ${j >> napot\_bits}={vpn[i] >> napot\_bits}$, all for the address space identified
- in {\em satp} as loaded by step 1.
-\end{itemize}
-
-\begin{commentary}
- The motivation for a NAPOT PTE is that it can be cached in a TLB as one or
- more entries representing the contiguous region as if it were a single
- (large) page covered by a single translation. This compaction can help
- relieve TLB pressure in some scenarios. The encoding is designed to fit
- within the pre-existing Sv39, Sv48, and Sv57 PTE formats so as not to disrupt
- existing implementations or designs that choose not to implement the scheme.
- It is also designed so as not to complicate the definition of the
- address-translation algorithm.
-
- The address translation cache abstraction captures the behavior that would result from the creation
- of a single TLB entry covering the entire NAPOT region. It is also designed
- to be consistent with implementations that support NAPOT PTEs by splitting
- the NAPOT region into TLB entries covering any smaller power-of-two region
- sizes. For example, a 64~KiB NAPOT PTE might trigger the creation of 16
- standard 4~KiB TLB entries, all with contents generated from the NAPOT PTE
- (even if the PTEs for the other 4~KiB regions have different contents).
-
- In typical usage scenarios, NAPOT PTEs in the same region will have the same
- attributes, same PPNs, and same values for bits 5--0. RSW remains reserved
- for supervisor software control. It is the responsibility of the OS and/or
- hypervisor to configure the page tables in such a way that there are no
- inconsistencies between NAPOT PTEs and other NAPOT or non-NAPOT PTEs that
- overlap the same address range. If an update needs to be made, the OS
- generally should first mark all of the PTEs invalid, then issue SFENCE.VMA
- instruction(s) covering all 4~KiB regions within the range (either via a
- single SFENCE.VMA with {\em rs1}={\tt x0}, or with multiple SFENCE.VMA
- instructions with {\em rs1}$\neq${\tt x0}), then update the PTE(s), as
- described in Section~\ref{sec:sfence.vma}, unless any inconsistencies are
- known to be benign. If any inconsistencies do exist, then the effect is the
- same as when SFENCE.VMA is used incorrectly: one of the translations will be
- chosen, but the choice is unpredictable.
-
- If an implementation chooses to use a NAPOT PTE (or cached version thereof),
- it might not consult the PTE directly specified by the algorithm in
- Section~\ref{sv32algorithm} at all. Therefore, the D and A bits may not be
- identical across all mappings of the same address range even in typical use
- cases The operating system must query all NAPOT aliases of a page to
- determine whether that page has been accessed and/or is dirty. If the OS
- manually sets the A and/or D bits for a page, it is recommended that the OS
- also set the A and/or D bits for other NAPOT aliases as appropriate in order
- to avoid unnecessary traps.
-
- Just as with normal PTEs, TLBs are permitted to cache NAPOT PTEs whose V
- (Valid) bit is clear.
-
- Depending on need, the NAPOT scheme may be extended to other intermediate
- page sizes and/or to other levels of the page table in the future. The
- encoding is designed to accommodate other NAPOT sizes should that need
- arise. For example:
-
- \begin{center}\em
- \begin{tabular}{|c|c||l|c|}
- \hline
- i & $pte.ppn[i]$ & Description & $pte.napot\_bits$ \\
- \hline
- 0 & {\tt x~xxxx~xxx1} & 8 KiB contiguous region & 1 \\
- 0 & {\tt x~xxxx~xx10} & 16 KiB contiguous region & 2 \\
- 0 & {\tt x~xxxx~x100} & 32 KiB contiguous region & 3 \\
- 0 & {\tt x~xxxx~1000} & 64 KiB contiguous region & 4 \\
- 0 & {\tt x~xxx1~0000} & 128 KiB contiguous region & 5 \\
- ... & ... & ... & ... \\
- 1 & {\tt x~xxxx~xxx1} & 4 MiB contiguous region & 1 \\
- 1 & {\tt x~xxxx~xx10} & 8 MiB contiguous region & 2 \\
- ... & ... & ... & ... \\
- \hline
- \end{tabular}
- \end{center}
-
- In such a case, an implementation may or may not support all options. The
- discoverability mechanism for this extension would be extended to allow
- system software to determine which sizes are supported.
-
- Other sizes may remain deliberately excluded, so that PPN bits not being
- used to indicate a valid NAPOT region size (e.g., the least-significant bit
- of $pte.ppn[i]$) may be repurposed for other uses in the future.
-
- However, in case finer-grained intermediate page size support proves not to
- be useful, we have chosen to standardize only 64~KiB support as a first step.
-\end{commentary}
-
-\chapter{``Svpbmt'' Standard Extension for Page-Based Memory Types, Version 1.0}
-\label{svpbmt}
-
-In Sv39, Sv48, and Sv57, bits 62--61 of a leaf page table entry indicate the use
-of page-based memory types that override the PMA(s) for the associated memory
-pages. The encoding for the PBMT bits is captured in Table~\ref{pbmt}.
-
-The Svpbmt extension depends on Sv39.
-
-\begin{table*}[h!]
-\begin{center}
-\begin{tabular}{|c|c|l|}
-\hline
-Mode & Value & Requested Memory Attributes \\
-\hline
-PMA & 0 & None \\
-NC & 1 & Non-cacheable, idempotent, weakly-ordered (RVWMO), main memory \\
-IO & 2 & Non-cacheable, non-idempotent, strongly-ordered (I/O ordering), I/O \\
-$-$ & 3 & {\em Reserved for future standard use} \\
-\hline
-\end{tabular}
-\end{center}
-\caption{Encodings for the PBMT field in Sv39, Sv48, and Sv57 PTEs. Attributes
-not mentioned are inherited from the PMA associated with the physical address.}
-\label{pbmt}
-\end{table*}
-
-\begin{commentary}
-Future extensions may provide more and/or finer-grained control over which PMAs
-can be overridden.
-\end{commentary}
-
-For non-leaf PTEs, bits 62--61 are reserved for future standard use. Until
-their use is defined by a standard extension, they must be cleared by software
-for forward compatibility, or else a page-fault exception is raised.
-
-For leaf PTEs, setting bits 62–-61 to the value 3 is reserved for future
-standard use.
-Until this value is defined by a standard extension, using this reserved value
-in a leaf PTE raises a page-fault exception.
-
-If the underlying physical memory attribute for a page is vacant, the PBMT settings do not override that.
-
-When PBMT settings override a main memory page into I/O or vice versa, memory
-accesses to such pages obey the memory ordering rules of the final effective
-attribute, as follows.
-
-If the underlying physical memory attribute for a page is I/O, and the page has
-PBMT=NC, then accesses to that page obey RVWMO.
-However, accesses to such pages are
-considered to be {\em both} I/O and main memory accesses for the purposes of FENCE,
-{\em.aq}, and {\em.rl}.
-
-If the underlying physical memory attribute for a page is main memory, and the
-page has PBMT=IO, then accesses to that page obey strong channel 0 I/O ordering
-rules with respect to other accesses to physical main memory and to other
-accesses to pages with PBMT=IO.
-However, accesses to such pages are
-considered to be {\em both} I/O and main memory accesses for the purposes of FENCE,
-{\em.aq}, and {\em.rl}.
-
-\begin{commentary}
-A device driver written to rely on I/O strong ordering rules will not
-operate correctly if the address range is mapped with PBMT=NC.
-As such, this configuration is discouraged.
-
-It will often still be useful to map physical I/O regions using PBMT=NC so that
-write combining and speculative accesses can be performed. Such optimizations
-will likely improve performance when applied with adequate care.
-\end{commentary}
-
-When Svpbmt is used with non-zero PBMT encodings,
-it is possible for multiple virtual aliases of the same
-physical page to exist simultaneously with different memory attributes. It is
-also possible for a U-mode or S-mode mapping through a PTE with Svpbmt enabled
-to observe different memory attributes for a given region of physical memory
-than a concurrent access to the same page performed by M-mode or when
-MODE=Bare. In such cases, the behaviors dictated by the attributes (including
-coherence, which is otherwise unaffected) may be violated.
-
-Accessing the same location using different attributes that are both non-cacheable
-(e.g., NC and IO) does not cause loss of coherence, but might result in weaker
-memory ordering than the stricter attribute ordinarily guarantees.
-Executing a {\tt fence iorw, iorw} instruction between such accesses suffices
-to prevent loss of memory ordering.
-
-Accessing the same location using different cacheability attributes may cause loss
-of coherence.
-Executing the following sequence between such accesses prevents both loss of
-coherence and loss of memory ordering:
-{\tt fence iorw, iorw}, followed by {\tt cbo.flush} to an address of
-that location, followed by a {\tt fence iorw, iorw}.
-
-\begin{commentary}
-It follows that, if the same location might later be referenced using the
-original attributes, then this sequence must be repeated beforehand.
-\end{commentary}
-
-\begin{commentary}
-In certain cases, a weaker sequence might suffice to prevent loss of
-coherence.
-These situations will be detailed following the forthcoming formalization of
-the interaction of the RVWMO memory model with the instructions in the Zicbom
-extension.
-\end{commentary}
-
-When two-stage address translation is enabled within the H extension, the
-page-based memory types are also applied in two stages. First, if
-{\tt hgatp}.MODE is not equal to zero, non-zero G-stage PTE PBMT bits override
-the attributes in the PMA to produce an intermediate set of attributes.
-Otherwise, the PMAs serve as the intermediate attributes. Second, if
-{\tt vsatp}.MODE is not equal to zero, non-zero VS-stage PTE PBMT bits override
-the intermediate attributes to produce the final set of attributes used by
-accesses to the page in question. Otherwise, the intermediate attributes are
-used as the final set of attributes.
-
-\chapter{``Svinval'' Standard Extension for Fine-Grained Address-Translation Cache Invalidation, Version 1.0}
-\label{svinval}
-
-The Svinval extension splits SFENCE.VMA, HFENCE.VVMA, and HFENCE.GVMA
-instructions into finer-grained invalidation and ordering operations that can
-be more efficiently batched or pipelined on certain classes of high-performance
-implementation.
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-SINVAL.VMA & asid & vaddr & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The SINVAL.VMA instruction invalidates any address-translation cache entries
-that an SFENCE.VMA instruction with the same values of {\em rs1} and {\em rs2}
-would invalidate. However, unlike SFENCE.VMA, SINVAL.VMA instructions are only
-ordered with respect to SFENCE.VMA, SFENCE.W.INVAL, and SFENCE.INVAL.IR
-instructions as defined below.
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-SFENCE.W.INVAL & 0 & 0 & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-SFENCE.INVAL.IR & 1 & 0 & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-The SFENCE.W.INVAL instruction guarantees that any previous stores already
-visible to the current RISC-V hart are ordered before subsequent SINVAL.VMA
-instructions executed by the same hart. The SFENCE.INVAL.IR instruction
-guarantees that any previous SINVAL.VMA instructions executed by the current hart
-are ordered before subsequent implicit references by that hart to the
-memory-management data structures.
-
-When executed in order (but not necessarily consecutively) by a single hart, the
-sequence SFENCE.W.INVAL, SINVAL.VMA, and SFENCE.INVAL.IR has the same effect as
-a hypothetical SFENCE.VMA instruction in which:
-\begin{itemize}
- \item the values of {\em rs1} and {\em rs2} for the SFENCE.VMA are the same
- as those used in the SINVAL.VMA,
- \item reads and writes prior to the SFENCE.W.INVAL are considered to be those
- prior to the SFENCE.VMA, and
- \item reads and writes following the SFENCE.INVAL.IR are considered to be
- those subsequent to the SFENCE.VMA.
-\end{itemize}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-HINVAL.VVMA & asid & vaddr & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-\vspace{-0.2in}
-\begin{center}
-\begin{tabular}{O@{}R@{}R@{}F@{}R@{}S}
-\\
-\instbitrange{31}{25} &
-\instbitrange{24}{20} &
-\instbitrange{19}{15} &
-\instbitrange{14}{12} &
-\instbitrange{11}{7} &
-\instbitrange{6}{0} \\
-\hline
-\multicolumn{1}{|c|}{funct7} &
-\multicolumn{1}{c|}{rs2} &
-\multicolumn{1}{c|}{rs1} &
-\multicolumn{1}{c|}{funct3} &
-\multicolumn{1}{c|}{rd} &
-\multicolumn{1}{c|}{opcode} \\
-\hline
-7 & 5 & 5 & 3 & 5 & 7 \\
-HINVAL.GVMA & vmid & gaddr & PRIV & 0 & SYSTEM \\
-\end{tabular}
-\end{center}
-
-If the hypervisor extension is implemented, the Svinval extension also provides two
-additional instructions: HINVAL.VVMA and HINVAL.GVMA. These have the same
-semantics as SINVAL.VMA, except that they combine with SFENCE.W.INVAL and
-SFENCE.INVAL.IR to replace HFENCE.VVMA and HFENCE.GVMA, respectively, instead
-of SFENCE.VMA. In addition, HINVAL.GVMA uses VMIDs instead of ASIDs.
-
-SINVAL.VMA, HINVAL.VVMA, and HINVAL.GVMA require the same permissions and raise
-the same exceptions as SFENCE.VMA, HFENCE.VVMA, and HFENCE.GVMA, respectively.
-In particular, an attempt to execute any of these instructions in
-U-mode always raises an illegal instruction exception, and an attempt
-to execute SINVAL.VMA or HINVAL.GVMA in S-mode or HS-mode when
-{\tt mstatus}.TVM=1 also raises an illegal instruction exception.
-An attempt to execute HINVAL.VVMA or HINVAL.GVMA in VS-mode or VU-mode,
-or to execute SINVAL.VMA in VU-mode, raises a virtual instruction
-exception.
-When {\tt hstatus}.VTVM=1, an attempt to execute SINVAL.VMA in VS-mode
-also raises a virtual instruction exception.
-
-\begin{commentary}
- SFENCE.W.INVAL and SFENCE.INVAL.IR instructions do not need to be trapped when
- {\tt mstatus}.TVM=1 or when {\tt hstatus}.VTVM=1, as they only have ordering
- effects but no visible side effects. Trapping of the SINVAL.VMA instruction
- is sufficient to enable emulation of the intended overall TLB maintenance
- functionality.
-
- In typical usage, software will invalidate a range of virtual addresses in
- the address-translation caches by executing an SFENCE.W.INVAL instruction,
- executing a series of SINVAL.VMA, HINVAL.VVMA, or HINVAL.GVMA instructions to
- the addresses (and optionally ASIDs or VMIDs) in question, and then executing
- an SFENCE.INVAL.IR instruction.
-
- High-performance implementations will be able to pipeline the
- address-translation cache invalidation operations, and will defer any
- pipeline stalls or other memory ordering enforcement until an SFENCE.W.INVAL,
- SFENCE.INVAL.IR, SFENCE.VMA, HFENCE.GVMA, or HFENCE.VVMA instruction is
- executed.
-
- Simpler implementations may implement SINVAL.VMA, HINVAL.VVMA, and
- HINVAL.GVMA identically to SFENCE.VMA, HFENCE.VVMA, and HFENCE.GVMA,
- respectively, while implementing SFENCE.W.INVAL and SFENCE.INVAL.IR
- instructions as no-ops.
-\end{commentary}
diff --git a/src/m-st-ext.adoc b/src/m-st-ext.adoc
index c8eac63..ac1d70b 100644
--- a/src/m-st-ext.adoc
+++ b/src/m-st-ext.adoc
@@ -61,11 +61,12 @@ include::images/wavedrom/division-op.adoc[]
DIV and DIVU perform an XLEN bits by XLEN bits signed and unsigned
integer division of _rs1_ by _rs2_, rounding towards zero. REM and REMU
provide the remainder of the corresponding division operation. For REM,
-the sign of the result equals the sign of the dividend.
+the sign of a nonzero result equals the sign of the dividend.
[NOTE]
====
-For both signed and unsigned division, it holds that
+For both signed and unsigned division, except in the case of overflow, it holds
+that
latexmath:[$\textrm{dividend} = \textrm{divisor} \times \textrm{quotient} + \textrm{remainder}$].
====
@@ -135,13 +136,13 @@ unsigned division circuit and specifying the same overflow result
simplifies the hardware.
====
-=== Zmmul Extension, Version 0.1
+=== Zmmul Extension, Version 1.0
The Zmmul extension implements the multiplication subset of the M
extension. It adds all of the instructions defined in
<<Multiplication Operations>>, namely: MUL, MULH, MULHU,
MULHSU, and (for RV64 only) MULW. The encodings are identical to those
-of the corresponding M-extension instructions.
+of the corresponding M-extension instructions. M implies Zmmul.
(((MUL, Zmmul)))
[NOTE]
diff --git a/src/machine.adoc b/src/machine.adoc
index 864b96b..640a794 100644
--- a/src/machine.adoc
+++ b/src/machine.adoc
@@ -1,9 +1,9 @@
[[machine]]
-== Machine-Level ISA, Version 1.12
+== Machine-Level ISA, Version 1.13
This chapter describes the machine-level operations available in
machine-mode (M-mode), which is the highest privilege mode in a RISC-V
-system. M-mode is used for low-level access to a hardware platform and
+hart. M-mode is used for low-level access to a hardware platform and
is the first mode entered at reset. M-mode can also be used to implement
features that are too difficult or expensive to implement in hardware
directly. The RISC-V machine-level ISA contains a common core that is
@@ -23,12 +23,11 @@ The `misa` CSR is a *WARL* read-write register reporting the ISA supported by th
.Machine ISA register (misa)
include::images/bytefield/misareg.edn[]
-The MXL (Machine XLEN) field encodes the native base integer ISA width
-as shown in <<misabase>>. The MXL field may be
-writable in implementations that support multiple base ISAs. The
-effective XLEN in M-mode, _MXLEN_, is given by the setting of MXL, or
-has a fixed value if `misa` is zero. The MXL field is always set to the
-widest supported ISA variant at reset.
+The MXL (Machine XLEN) field encodes the native base integer ISA width as
+shown in <<misabase>>. The MXL field is read-only. If `misa` is nonzero, the
+MXL field indicates the effective XLEN in M-mode, a constant termed _MXLEN_.
+XLEN is never greater than MXLEN, but XLEN might be smaller than MXLEN in
+less-privileged modes.
[[misabase]]
.Encoding of MXL field in `misa`
@@ -43,34 +42,31 @@ widest supported ISA variant at reset.
128
|===
-The `misa` CSR is MXLEN bits wide. If the value read from `misa` is
-nonzero, field MXL of that value always denotes the current MXLEN. If a
-write to `misa` causes MXLEN to change, the position of MXL moves to the
-most-significant two bits of `misa` at the new width.
+The `misa` CSR is MXLEN bits wide.
[NOTE]
====
The base width can be quickly ascertained using branches on the sign of
the returned `misa` value, and possibly a shift left by one and a second
branch on the sign. These checks can be written in assembly code without
-knowing the register width (XLEN) of the machine. The base width is
-given by __XLEN=2^MXL+4^__.
+knowing the register width (MXLEN) of the hart. The base width is
+given by __MXLEN=2^MXL+4^__.
The base width can also be found if `misa` is zero, by placing the
immediate 4 in a register then shifting the register left by 31 bits at
-a time. If zero after one shift, then the machine is RV32. If zero after
-two shifts, then the machine is RV64, else RV128.
+a time. If zero after one shift, then the hart is RV32. If zero after
+two shifts, then the hart is RV64, else RV128.
====
The Extensions field encodes the presence of the standard extensions,
with a single bit per letter of the alphabet (bit 0 encodes presence of
extension "A" , bit 1 encodes presence of extension "B", through to
bit 25 which encodes "Z"). The "I" bit will be set for RV32I, RV64I,
-RV128I base ISAs, and the "E" bit will be set for RV32E. The
+and RV128I base ISAs, and the "E" bit will be set for RV32E and RV64E. The
Extensions field is a *WARL* field that can contain writable bits where the
implementation allows the supported ISA to be modified. At reset, the
Extensions field shall contain the maximal set of supported extensions,
-and I shall be selected over E if both are available.
+and "I" shall be selected over "E" if both are available.
When a standard extension is disabled by clearing its bit in `misa`, the
instructions and CSRs defined or modified by the extension revert to
@@ -91,7 +87,7 @@ clearing its bit in `misa` results in the extension being considered
_not implemented_ in M-mode. For example, setting `misa`.F=0 results in
the F extension being not implemented for M-mode, because the F
extension's instructions will not act as the Unprivileged ISA requires
-but may instead raise an illegal instruction exception.
+but may instead raise an illegal-instruction exception.
Defining the term _implemented_ based strictly on the observable
behavior might conflict with other common understandings of the same
@@ -163,7 +159,7 @@ Z
_Reserved_ +
Compressed extension +
Double-precision floating-point extension +
-RV32E base ISA +
+RV32E/64E base ISA +
Single-precision floating-point extension +
_Reserved_ +
Hypervisor extension +
@@ -180,7 +176,7 @@ _Reserved_ +
Supervisor mode implemented +
_Reserved_ +
User mode implemented +
-"V" Vector extension implemented +
+Vector extension +
_Reserved_ +
Non-standard extensions present +
_Reserved_ +
@@ -232,6 +228,13 @@ write to `misa` is suppressed, leaving `misa` unchanged.
When software enables an extension that was previously disabled, then
all state uniquely associated with that extension is UNSPECIFIED, unless otherwise specified by that extension.
+NOTE: Although one of the bits 25--0 in `misa` being set to 1 implies that
+the corresponding feature is implemented, the inverse is not necessarily
+true: one of these bits being clear does not necessarily imply that the
+corresponding feature is not implemented. This follows from the fact that,
+when a feature is not implemented, the corresponding opcodes and CSRs become
+reserved, not necessarily illegal.
+
==== Machine Vendor ID Register `mvendorid`
The `mvendorid` CSR is a 32-bit read-only register providing the JEDEC
@@ -392,7 +395,7 @@ instruction.
====
When a hart is executing in privilege mode _x_, interrupts are globally
-enabled when __x__IE=1 and globally disabled when __x__ IE=0. Interrupts for
+enabled when __x__IE=1 and globally disabled when __x__IE=0. Interrupts for
lower-privilege modes, __w__<__x__, are always globally
disabled regardless of the setting of any global __w__IE bit for the
lower-privilege mode. Interrupts for higher-privilege modes,
@@ -441,6 +444,19 @@ __y__&#8800;M, __x__RET also sets MPRV=0.
Setting __x__PP to the least-privileged supported mode on an __x__RET helps identify software bugs in the management of the two-level privilege-mode stack.
====
+[NOTE]
+====
+Trap handlers must be designed to neither enable interrupts nor cause exceptions
+during the phase of handling where the trap handler preserves the critical state
+information required to handle and resume from the trap. An exception or
+interrupt in this critical phase of trap handling may lead to a trap that can
+overwrite such critical state. This could result in the loss of data needed to
+recover from the initial trap. Further, if an exception occurs in the code path
+needed to handle traps, then such a situation may lead to an infinite loop of
+traps. To prevent this, trap handlers must be meticulously designed to identify
+and safely manage exceptions within their operational flow.
+====
+
__x__PP fields are *WARL* fields that can hold only privilege mode _x_ and any implemented privilege mode lower than _x_. If privilege mode _x_ is not implemented, then __x__PP must be read-only 0.
[NOTE]
@@ -455,25 +471,28 @@ storage bit is required to represent either 00 or 11 in MPP.
[[xlen-control]]
===== Base ISA Control in `mstatus` Register
-For RV64 systems, the SXL and UXL fields are *WARL* fields that control the
+For RV64 harts, the SXL and UXL fields are *WARL* fields that control the
value of XLEN for S-mode and U-mode, respectively. The encoding of these
fields is the same as the MXL field of `misa`, shown in
<<misabase>>. The effective XLEN in S-mode and
U-mode are termed _SXLEN_ and _UXLEN_, respectively.
-For RV32 systems, the SXL and UXL fields do not exist, and SXLEN=32 and
+When MXLEN=32, the SXL and UXL fields do not exist, and SXLEN=32 and
UXLEN=32.
-For RV64 systems, if S-mode is not supported, then SXL is read-only
+When MXLEN=64, if S-mode is not supported, then SXL is read-only
zero. Otherwise, it is a *WARL* field that encodes the current value of SXLEN.
In particular, an implementation may make SXL be a read-only field whose
value always ensures that SXLEN=MXLEN.
-For RV64 systems, if U-mode is not supported, then UXL is read-only
+When MXLEN=64, if U-mode is not supported, then UXL is read-only
zero. Otherwise, it is a *WARL* field that encodes the current value of UXLEN.
In particular, an implementation may make UXL be a read-only field whose
value always ensures that UXLEN=MXLEN or UXLEN=SXLEN.
+If S-mode is implemented, the set of legal values that the UXL field may
+assume excludes those that would cause UXLEN to be greater than SXLEN.
+
Whenever XLEN in any mode is set to a value less than the widest
supported XLEN, all operations must ignore source operand register bits
above the configured XLEN, and must sign-extend results to fill the
@@ -493,11 +512,6 @@ always be a software bug, but machine operation is well-defined even in
this case.
====
-If MXLEN is changed from 32 to a wider width, each of `mstatus` fields
-SXL and UXL, if not restricted to a single value, gets the value
-corresponding to the widest supported width not wider than the new
-MXLEN.
-
===== Memory Privilege in `mstatus` Register
The MPRV (Modify PRiVilege) bit modifies the _effective privilege mode_,
@@ -638,7 +652,7 @@ stores.
The TVM (Trap Virtual Memory) bit is a *WARL* field that supports intercepting
supervisor virtual-memory management operations. When TVM=1, attempts to
read or write the `satp` CSR or execute an SFENCE.VMA or SINVAL.VMA
-instruction while executing in S-mode will raise an illegal instruction
+instruction while executing in S-mode will raise an illegal-instruction
exception. When TVM=0, these operations are permitted in S-mode. TVM is
read-only 0 when S-mode is not supported.
@@ -659,8 +673,8 @@ instruction may execute in lower privilege modes when not prevented for
some other reason. When TW=1, then if WFI is executed in any
less-privileged mode, and it does not complete within an
implementation-specific, bounded time limit, the WFI instruction causes
-an illegal instruction exception. An implementation may have WFI always
-raise an illegal instruction exception in less-privileged modes when
+an illegal-instruction exception. An implementation may have WFI always
+raise an illegal-instruction exception in less-privileged modes when
TW=1, even if there are pending globally-disabled interrupts when the
instruction is executed. TW is read-only 0 when there are no modes less
privileged than M.
@@ -672,14 +686,14 @@ OS, rather than wastefully idling in the current guest.
====
When S-mode is implemented, then executing WFI in U-mode causes an
-illegal instruction exception, unless it completes within an
+illegal-instruction exception, unless it completes within an
implementation-specific, bounded time limit. A future revision of this
specification might add a feature that allows S-mode to selectively
permit WFI in U-mode. Such a feature would only be active when TW=0.
The TSR (Trap SRET) bit is a *WARL* field that supports intercepting the
supervisor exception return instruction, SRET. When TSR=1, attempts to
-execute SRET while executing in S-mode will raise an illegal instruction
+execute SRET while executing in S-mode will raise an illegal-instruction
exception. When TSR=0, this operation is permitted in S-mode. TSR is
read-only 0 when S-mode is not supported.
@@ -771,7 +785,7 @@ If neither the `v` registers nor S-mode is implemented, then VS is
read-only zero. If S-mode is implemented but the `v` registers are not,
VS may optionally be read-only zero.
-In systems without additional user extensions requiring new state, the
+In harts without additional user extensions requiring new state, the
XS field is read-only zero. Every additional extension with state
provides a CSR field that encodes the equivalent of the XS states. The
XS field represents a summary of all extensions' status as shown in
@@ -790,8 +804,8 @@ saving extended user context to memory. If FS, XS, and VS are all
read-only zero, then SD is also always zero.
When an extension's status is set to Off, any instruction that attempts
-to read or write the corresponding state will cause an illegal
-instruction exception. When the status is Initial, the corresponding
+to read or write the corresponding state will cause an
+illegal-instruction exception. When the status is Initial, the corresponding
state should have an initial constant value. When the status is Clean,
the corresponding state is potentially different from the initial value,
but matches the last value stored on a context swap. When the status is
@@ -834,7 +848,7 @@ cause the coprocessor state to be initialized to a constant value at
context restore, not at every unconfigure.
Executing a user-mode instruction to disable a unit and place it into
-the Off state will cause an illegal instruction exception to be raised
+the Off state will cause an illegal-instruction exception to be raised
if any subsequent instruction tries to use the unit before it is turned
back on. A user-mode instruction to turn a unit on must also ensure the
unit's state is properly initialized, as the unit might have been used
@@ -880,13 +894,16 @@ transitions for the FS, VS, or XS status bits. Note that the standard
floating-point and vector extensions do not support user-mode
unconfigure or disable/enable instructions.
-[[fsxsstates]]
-.FS, FS, and XS state transitions.
-[%autowidth,float="center",align="center",cols="<,<,<,<,<",]
+<<<
+
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
|===
|Current State +
Action |Off |Initial |Clean |Dirty
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|At context save in privileged code
|Save state? +
@@ -899,7 +916,10 @@ Initial
Clean
|Yes +
Clean
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|At context restore in privileged code
|Restore state? +
@@ -912,7 +932,10 @@ Initial
Clean
|N/A +
N/A
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|Execute instruction to read state
|Action? +
@@ -925,7 +948,10 @@ Initial
Clean
|Execute +
Dirty
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|Execute instruction that possibly modifies state, including
configuration
@@ -939,7 +965,10 @@ Dirty
Dirty
|Execute +
Dirty
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|Execute instruction to unconfigure unit
|Action? +
@@ -952,7 +981,10 @@ Initial
Initial
|Execute +
Initial
+|===
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+|===
5+^|Execute instruction to disable unit
|Action? +
@@ -965,7 +997,12 @@ Off
Off
|Execute +
Off
+|===
+[[fsxsstates]]
+[width=75,align=center,float=center,cols="<,<,<,<,<"]
+.FS, FS, and XS state transitions.
+|===
5+^|Execute instruction to enable unit
|Action? +
@@ -1061,7 +1098,7 @@ hand, we wish to allow flexibility for larger systems.
|Direct +
Vectored +
---
-|All exceptions set `pc` to BASE. +
+|All traps set `pc` to BASE. +
Asynchronous interrupts set `pc` to BASE+4&#215;cause. +
_Reserved_
|===
@@ -1076,14 +1113,6 @@ times the interrupt cause number. For example, a machine-mode timer
interrupt (see <<mcauses>>) causes the `pc`
to be set to BASE+`0x1c`.
-[NOTE]
-====
-When vectored interrupts are enabled, interrupt cause 0, which
-corresponds to user-mode software interrupts, are vectored to the same
-location as synchronous exceptions. This ambiguity does not arise in
-practice, since user-mode software interrupts are either disabled or
-delegated to user mode.
-====
An implementation may have different alignment constraints for different
modes. In particular, MODE=Vectored may have stricter alignment
@@ -1108,19 +1137,20 @@ appropriate level with the MRET instruction
implementations can provide individual read/write bits within `medeleg`
and `mideleg` to indicate that certain exceptions and interrupts should
be processed directly by a lower privilege level. The machine exception
-delegation register (`medeleg`) and machine interrupt delegation
-register (`mideleg`) are MXLEN-bit read/write registers.
+delegation register (`medeleg`) is a 64-bit read/write register.
+The machine interrupt delegation register (`mideleg`) is an MXLEN-bit
+read/write register.
-In systems with S-mode, the `medeleg` and `mideleg` registers must
+In harts with S-mode, the `medeleg` and `mideleg` registers must
exist, and setting a bit in `medeleg` or `mideleg` will delegate the
corresponding trap, when occurring in S-mode or U-mode, to the S-mode
-trap handler. In systems without S-mode, the `medeleg` and `mideleg`
+trap handler. In harts without S-mode, the `medeleg` and `mideleg`
registers should not exist.
[NOTE]
====
In versions 1.9.1 and earlier , these registers existed but were
-hardwired to zero in M-mode only, or M/U without N systems. There is no
+hardwired to zero in M-mode only, or M/U without N harts. There is no
reason to require they return zero in those cases, as the `misa`
register indicates whether they exist.
====
@@ -1153,11 +1183,11 @@ read-only one. Platform standards may always add such restrictions.
====
Traps never transition from a more-privileged mode to a less-privileged
-mode. For example, if M-mode has delegated illegal instruction
+mode. For example, if M-mode has delegated illegal-instruction
exceptions to S-mode, and M-mode software later executes an illegal
instruction, the trap is taken in M-mode, rather than being delegated to
S-mode. By contrast, traps may be taken horizontally. Using the same
-example, if M-mode has delegated illegal instruction exceptions to
+example, if M-mode has delegated illegal-instruction exceptions to
S-mode, and S-mode software later executes an illegal instruction, the
trap is taken in S-mode.
@@ -1177,6 +1207,10 @@ bit position equal to the value returned in the `mcause` register (i.e.,
setting bit 8 allows user-mode environment calls to be delegated to a
lower-privilege trap handler).
+When XLEN=32, `medelegh` is a 32-bit read/write register
+that aliases bits 63:32 of `medeleg`.
+Register `medelegh` does not exist when XLEN=64.
+
.Machine Interrupt Delegation Register `mideleg`.
include::images/bytefield/mideleg.adoc[]
@@ -1196,12 +1230,15 @@ MXLEN-bit read/write register containing interrupt enable bits.
Interrupt cause number _i_ (as reported in CSR `mcause`,
<<mcause>>) corresponds with bit _i_ in both `mip` and
`mie`. Bits 15:0 are allocated to standard interrupt causes only, while
-bits 16 and above are designated for platform or custom use.
+bits 16 and above are designated for platform use.
+
+NOTE: Interrupts designated for platform use may be designated for custom use
+at the platform's discretion.
.Machine Interrupt-Pending Register (mip).
include::images/bytefield/mideleg.adoc[]
-.Machine Interrupt-Pending Register (mie)
+.Machine Interrupt-Enable Register (mie)
include::images/bytefield/mideleg.adoc[]
An interrupt _i_ will trap to M-mode (causing the privilege mode to
@@ -1320,8 +1357,17 @@ the interrupt-pending and interrupt-enable bits for supervisor-level
software interrupts. SSIP is writable in `mip` and may also be set to 1
by a platform-specific interrupt controller.
+If the Sscofpmf extension is implemented, bits `mip`.LCOFIP and `mie`.LCOFIE
+are the interrupt-pending and interrupt-enable bits for local counter-overflow
+interrupts.
+LCOFIP is read-write in `mip` and reflects the occurrence of a local
+counter-overflow overflow interrupt request resulting from any of the
+`mhpmevent__n__`.OF bits being set.
+If the Sscofpmf extension is not implemented, `mip`.LCOFIP and `mie`.LCOFIE are
+read-only zeros.
+
Multiple simultaneous interrupts destined for M-mode are handled in the
-following decreasing priority order: MEI, MSI, MTI, SEI, SSI, STI.
+following decreasing priority order: MEI, MSI, MTI, SEI, SSI, STI, LCOFI.
[NOTE]
====
@@ -1363,7 +1409,7 @@ M-mode includes a basic hardware performance-monitoring facility. The
`mcycle` CSR counts the number of clock cycles executed by the processor
core on which the hart is running. The `minstret` CSR counts the number
of instructions the hart has retired. The `mcycle` and `minstret`
-registers have 64-bit precision on all RV32 and RV64 systems.
+registers have 64-bit precision on all RV32 and RV64 harts.
The counter registers have an arbitrary value after the hart is reset,
and can be written with a given value. Any CSR write takes effect after
@@ -1400,7 +1446,7 @@ only bits 31-0; reads of the `mcycleh`, `minstreth`, and `mhpmcounternh`
CSRs return bits 63-32 of the corresponding counter, and writes change
only bits 63-32.
-.Upper 32 bits of hardware performace monitor counters, RV32 only.
+.Upper 32 bits of hardware performance monitor counters, RV32 only.
include::images/bytefield/hpmcounters.adoc[]
[[mcounteren]]
@@ -1420,16 +1466,16 @@ counters, which continue to increment even when not accessible.
When the CY, TM, IR, or HPM__n__ bit in the `mcounteren` register is
clear, attempts to read the `cycle`, `time`, `instret`, or
`hpmcountern` register while executing in S-mode or U-mode will cause an
-illegal instruction exception. When one of these bits is set, access to
+illegal-instruction exception. When one of these bits is set, access to
the corresponding register is permitted in the next implemented
privilege mode (S-mode if implemented, otherwise U-mode).
[NOTE]
====
The counter-enable bits support two common use cases with minimal
-hardware. For systems that do not need high-performance timers and
+hardware. For harts that do not need high-performance timers and
counters, machine-mode software can trap accesses and implement all
-features in software. For systems that need high-performance timers and
+features in software. For harts that need high-performance timers and
counters but are not concerned with obfuscating the underlying hardware
counters, the counters can be directly exposed to lower privilege modes.
====
@@ -1450,10 +1496,10 @@ loads to the memory-mapped `mtime` register, or emulate this
functionality on behalf of less-privileged modes in M-mode software.
====
-In systems with U-mode, the `mcounteren` must be implemented, but all
+In harts with U-mode, the `mcounteren` must be implemented, but all
fields are *WARL* and may be read-only zero, indicating reads to the
-corresponding counter will cause an illegal instruction exception when
-executing in a less-privileged mode. In systems without U-mode, the
+corresponding counter will cause an illegal-instruction exception when
+executing in a less-privileged mode. In harts without U-mode, the
`mcounteren` register should not exist.
==== Machine Counter-Inhibit CSR (`mcountinhibit`)
@@ -1585,8 +1631,8 @@ table.
***
-We do not distinguish privileged instruction exceptions from illegal
-opcode exceptions. This simplifies the architecture and also hides
+We do not distinguish privileged instruction exceptions from
+illegal-instruction exceptions. This simplifies the architecture and also hides
details of which higher-privilege instructions are supported by an
implementation. The privilege level servicing the trap can implement a
policy on whether these need to be distinguished, and if so, whether a
@@ -1643,10 +1689,16 @@ Supervisor external interrupt +
_Reserved_ +
Machine external interrupt
|1 +
+1 +
+1 +
1
-|12-15 +
+|12 +
+13 +
+14-15 +
&#8805;16
|_Reserved_ +
+Counter-overflow interrupt +
+_Reserved_ +
_Designated for platform use_
|0 +
0 +
@@ -1685,7 +1737,10 @@ _Designated for platform use_
13 +
14 +
15 +
-16-23 +
+16-17 +
+18 +
+19 +
+20-23 +
24-31 +
32-47 +
48-63 +
@@ -1707,6 +1762,9 @@ Load page fault +
_Reserved_ +
Store/AMO page fault +
_Reserved_ +
+Software check +
+Hardware error +
+_Reserved_ +
_Designated for custom use_ +
_Reserved_ +
_Designated for custom use_ +
@@ -1781,6 +1839,29 @@ an instruction. Therefore, these exceptions have lower priority than
other instruction address exceptions.
====
+[NOTE]
+====
+A Software Check exception is a synchronous exception that is triggered when
+there are violations of checks and assertions defined by ISA extensions that
+aim to safeguard the integrity of software assets, including e.g. control-flow
+and memory-access constraints. When this exception is raised, the `__x__tval`
+register is set either to 0 or to an informative value defined by the extension
+that stipulated the exception be raised. The priority of this exception,
+relative to other synchronous exceptions, depends on the cause of this exception
+and is defined by the extension that stipulated the exception be raised.
+
+A Hardware Error exception is a synchronous exception triggered when corrupted or
+uncorrectable data is accessed explicitly or implicitly by an instruction. In
+this context, "data" encompasses all types of information used within a RISC-V
+hart. Upon a hardware error exception, the `__x__epc` register is set to the
+address of the instruction that attempted to access corrupted data, while the
+`__x__tval` register is set either to 0 or to the virtual address of an
+instruction fetch, load, or store that attempted to access corrupted data. The
+priority of Hardware Error exception is implementation-defined, but any given
+occurrence is generally expected to be recognized at the point in the overall
+priority order at which the hardware error is discovered.
+====
+
==== Machine Trap Value Register (`mtval`)
The `mtval` register is an MXLEN-bit read-write register formatted as
@@ -1789,8 +1870,10 @@ M-mode, `mtval` is either set to zero or written with exception-specific
information to assist software in handling the trap. Otherwise, `mtval`
is never written by the implementation, though it may be explicitly
written by software. The hardware platform will specify which exceptions
-must set `mtval` informatively and which may unconditionally set it to
-zero. If the hardware platform specifies that no exceptions set `mtval`
+must set `mtval` informatively, which may unconditionally set it to
+zero, and which may exhibit either behavior, depending on the underlying event
+that caused the exception.
+If the hardware platform specifies that no exceptions set `mtval`
to a nonzero value, then `mtval` is read-only zero.
If `mtval` is written with a nonzero value when a breakpoint,
@@ -1814,13 +1897,13 @@ contain the virtual address of the portion of the access that caused the
fault.
If `mtval` is written with a nonzero value when an instruction
-access-fault or page-fault exception occurs on a system with
+access-fault or page-fault exception occurs on a hart with
variable-length instructions, then `mtval` will contain the virtual
address of the portion of the instruction that caused the fault, while
`mepc` will point to the beginning of the instruction.
The `mtval` register can optionally also be used to return the faulting
-instruction bits on an illegal instruction exception (`mepc` points to
+instruction bits on an illegal-instruction exception (`mepc` points to
the faulting instruction in memory). If `mtval` is written with a
nonzero value when an illegal-instruction exception occurs, then `mtval`
will contain the shortest of:
@@ -1882,8 +1965,8 @@ and their configuration.
include::images/bytefield/mconfigptrreg.adoc[]
-The pointer alignment in bits must be no smaller than the greatest
-supported MXLEN: i.e., if the greatest supported MXLEN is
+The pointer alignment in bits must be no smaller than MXLEN:
+i.e., if MXLEN is
latexmath:[$8\times n$], then `mconfigptr`[latexmath:[$\log_2n$]-1:0]
must be zero.
@@ -1905,6 +1988,7 @@ of a memory-mapped register that is programmed by the platform or by
M-mode software towards the beginning of the boot process.
====
+[[sec:menvcfg]]
==== Machine Environment Configuration Register (`menvcfg`)
The `menvcfg` CSR is a 64-bit read/write register, formatted
@@ -1961,6 +2045,31 @@ implemented, PBMTE is read-only zero. Furthermore, for implementations
with the hypervisor extension, `henvcfg`.PBMTE is read-only zero if
`menvcfg`.PBMTE is zero.
+After changing `menvcfg`.PBMTE, executing an SFENCE.VMA instruction with
+_rs1_=`x0` and _rs2_=`x0` suffices to synchronize address-translation caches
+with respect to the altered interpretation of page-table entries' PBMT fields.
+See <<hyp-mm-fences>> for additional synchronization requirements when the
+hypervisor extension is implemented.
+
+If the Svadu extension is implemented, the ADUE bit controls whether hardware
+updating of PTE A/D bits is enabled for S-mode and G-stage address
+translations.
+When ADUE=1, hardware updating of PTE A/D bits is enabled during S-mode
+address translation, and the implementation behaves as though the Svade
+extension were not implemented for S-mode address translation.
+When the hypervisor extension is implemented, if ADUE=1, hardware updating of
+PTE A/D bits is enabled during G-stage address translation, and the
+implementation behaves as though the Svade extension were not implemented for
+G-stage address translation.
+When ADUE=0, the implementation behaves as though Svade were implemented for
+S-mode and G-stage address translation.
+If Svadu is not implemented, ADUE is read-only zero.
+Furthermore, for implementations with the hypervisor extension, `henvcfg`.ADUE
+is read-only zero if `menvcfg`.ADUE is zero.
+
+NOTE: The Svade extension requires page-fault exceptions be raised when PTE
+A/D bits need be set, hence Svade is implemented when ADUE=0.
+
The definition of the STCE field will be furnished by the forthcoming
Sstc extension. Its allocation within `menvcfg` may change prior to the
ratification of that extension.
@@ -1973,6 +2082,10 @@ The definitions of the CBCFE and CBIE fields will be furnished by the
forthcoming Zicbom extension. Their allocations within `menvcfg` may
change prior to the ratification of that extension.
+The definition of the PMM field will be furnished by the forthcoming
+Smnpm extension. Its allocation within `menvcfg` may change prior to the
+ratification of that extension.
+
When XLEN=32, `menvcfgh` is a 32-bit read/write register
that aliases bits 63:32 of `menvcfg`.
Register `menvcfgh` does not exist when XLEN=64.
@@ -1997,6 +2110,10 @@ The definitions of the RLB, MMWP, and MML fields will be furnished by
the forthcoming PMP-enhancement extension, Smepmp. Their allocations
within `mseccfg` may change prior to the ratification of that extension.
+The definition of the PMM field will be furnished by the forthcoming
+Smmpm extension. Its allocation within `mseccfg` may change prior to the
+ratification of that extension.
+
When XLEN=32 only, `mseccfgh` is a 32-bit read/write register that
aliases bits 63:32 of `mseccfg`.
Register `mseccfgh` does not exist when XLEN=64.
@@ -2133,8 +2250,8 @@ include::images/wavedrom/trap-return.adoc[]
To return after handling a trap, there are separate trap return
instructions per privilege level, MRET and SRET. MRET is always
provided. SRET must be provided if supervisor mode is supported, and
-should raise an illegal instruction exception otherwise. SRET should
-also raise an illegal instruction exception when TSR=1 in `mstatus`, as
+should raise an illegal-instruction exception otherwise. SRET should
+also raise an illegal-instruction exception when TSR=1 in `mstatus`, as
described in <<virt-control>>. An __x__RET instruction
can be executed in privilege mode _x_ or higher, where executing a
lower-privilege __x__RET instruction will pop the relevant lower-privilege
@@ -2156,13 +2273,13 @@ impossible to single-step through LR/SC sequences using a debugger.
[[wfi]]
==== Wait for Interrupt
-The Wait for Interrupt instruction (WFI) provides a hint to the
+The Wait for Interrupt instruction (WFI) informs the
implementation that the current hart can be stalled until an interrupt
might need servicing. Execution of the WFI instruction can also be used
to inform the hardware platform that suitable interrupts should
preferentially be routed to this hart. WFI is available in all
privileged modes, and optionally available to U-mode. This instruction
-may raise an illegal instruction exception when TW=1 in `mstatus`, as
+may raise an illegal-instruction exception when TW=1 in `mstatus`, as
described in <<virt-control>>.
include::images/wavedrom/wfi.adoc[]
@@ -2179,9 +2296,9 @@ return from the trap handler will execute code after the WFI
instruction.
====
-The purpose of the WFI instruction is to provide a hint to the
-implementation, and so a legal implementation is to simply implement WFI
-as a NOP.
+Implementations are permitted to resume execution for any reason, even if an
+enabled interrupt has not become pending. Hence, a legal implementation is to
+simply implement the WFI instruction as a NOP.
[NOTE]
====
@@ -2249,8 +2366,8 @@ include::images/bytefield/cust-sys-instr.adoc[]
Upon reset, a hart’s privilege mode is set to M. The `mstatus` fields
MIE and MPRV are reset to 0. If little-endian memory accesses are
supported, the `mstatus`/`mstatush` field MBE is reset to 0. The `misa`
-register is reset to enable the maximal set of supported extensions and
-widest MXLEN, as described in <<misa>>. For
+register is reset to enable the maximal set of supported extensions,
+as described in <<misa>>. For
implementations with the "A" standard extension, there is no valid
load reservation. The `pc` is set to an implementation-defined reset
vector. The `mcause` register is set to a value indicating the cause of
@@ -2259,7 +2376,7 @@ the platform mandates a different reset value for some PMP registers’ A
and L fields. If the hypervisor extension is implemented, the
`hgatp`.MODE and `vsatp`.MODE fields are reset to 0. If the Smrnmi
extension is implemented, the `mnstatus`.NMIE field is reset to 0. No
- *WARL* field contains an illegal value. All other hart state is .
+ *WARL* field contains an illegal value. All other hart state is UNSPECIFIED.
The `mcause` values after reset have implementation-specific
interpretation, but the value 0 should be returned on implementations
@@ -2509,39 +2626,37 @@ should provide alternative fall-back mechanisms used when lack of
progress is detected.
====
-===== Alignment
-
-Memory regions that support aligned LR/SC or aligned AMOs might also
-support misaligned LR/SC or misaligned AMOs for some addresses and
-access widths. If, for a given address and access width, a misaligned
-LR/SC or AMO generates an address-misaligned exception, then _all_
-loads, stores, LRs/SCs, and AMOs using that address and access width
-must generate address-misaligned exceptions.
-
-[NOTE]
-====
-The standard "A" extension does not support misaligned AMOs or LR/SC
-pairs. Support for misaligned AMOs is provided by the standard "Zam"
-extension. Support for misaligned LR/SC sequences is not currently
-standardized, so LR and SC to misaligned addresses must raise an
-exception.
-
-Mandating that misaligned loads and stores raise address-misaligned
-exceptions wherever misaligned AMOs raise address-misaligned exceptions
-permits the emulation of misaligned AMOs in an M-mode trap handler. The
-handler guarantees atomicity by acquiring a global mutex and emulating
-the access within the critical section. Provided that the handler for
-misaligned loads and stores uses the same mutex, all accesses to a given
-address that use the same word size will be mutually atomic.
-====
-
+==== Misaligned Atomicity Granule PMA
+
+The misaligned atomicity granule PMA provides constrained support for
+misaligned AMOs.
+This PMA, if present, specifies the size of a _misaligned atomicity granule_,
+a naturally aligned power-of-two number of bytes.
+Specific supported values for this PMA are represented by MAG__NN__, e.g.,
+MAG16 indicates the misaligned atomicity granule is at least 16 bytes.
+
+The misaligned atomicity granule PMA applies only to AMOs, loads and stores
+defined in the base ISAs, and loads and stores of no more than MXLEN bits
+defined in the F, D, and Q extensions.
+For an instruction in that set, if all accessed bytes lie within the same
+misaligned atomicity granule, the instruction will not raise an exception for
+reasons of address alignment, and the instruction will give rise to only one
+memory operation for the purposes of RVWMO--i.e., it will execute atomically.
+
+If a misaligned AMO accesses a region that does not specify a misaligned
+atomicity granule PMA, or if not all accessed bytes lie within the same
+misaligned atomicity granule, then an exception is raised.
+For regular loads and stores that access such a region or for which not all
+accessed bytes lie within the same atomicity granule, then either an exception
+is raised, or the access proceeds but is not guaranteed to be atomic.
Implementations may raise access-fault exceptions instead of
-address-misaligned exceptions for some misaligned accesses, indicating
-the instruction should not be emulated by a trap handler. If, for a
-given address and access width, all misaligned LRs/SCs and AMOs generate
-access-fault exceptions, then regular misaligned loads and stores using
-the same address and access width are not required to execute
-atomically.
+address-misaligned exceptions for some misaligned accesses, indicating the
+instruction should not be emulated by a trap handler.
+
+NOTE: LR/SC instructions are unaffected by this PMA and so always raise an
+exception when misaligned. Vector memory accesses are also unaffected, so
+might execute non-atomically even when contained within a misaligned atomicity
+granule. Implicit accesses are similarly unaffected by this PMA.
==== Memory-Ordering PMAs
@@ -2782,7 +2897,7 @@ illegal.
[NOTE]
====
-RV64 systems use `pmpcfg2`, rather than `pmpcfg1`, to hold
+RV64 harts use `pmpcfg2`, rather than `pmpcfg1`, to hold
configurations for PMP entries 8-15. This design reduces the cost of
supporting multiple MXLEN values, since the configurations for PMP
entries 8-11 appear in `pmpcfg2`[31:0] for both RV32 and RV64.
@@ -2844,13 +2959,6 @@ store-conditional, or AMO instruction which accesses a physical address
within a PMP region without write permissions raises a store
access-fault exception.
-If MXLEN is changed, the contents of the `pmpxcfg` fields are preserved,
-but appear in the `pmpcfgy` CSR prescribed by the new setting of MXLEN.
-For example, when MXLEN is changed from 64 to 32, `pmp4cfg` moves from
-`pmpcfg0`[39:32] to `pmpcfg1`[7:0]. The `pmpaddr` CSRs follow the usual
-CSR width modulation rules described in
-<<csrwidthmodulation>>.
-
===== Address Matching
The A field in a PMP entry's configuration register encodes the
@@ -3020,9 +3128,8 @@ fetches may also be decomposed into multiple accesses, some of which may
succeed before an access-fault exception occurs. In particular, a
portion of a misaligned store that passes the PMP check may become
visible, even if another portion fails the PMP check. The same behavior
-may manifest for floating-point stores wider than XLEN bits (e.g., the
-FSD instruction in RV32D), even when the store address is naturally
-aligned.
+may manifest for stores wider than XLEN bits (e.g., the FSD instruction
+in RV32D), even when the store address is naturally aligned.
[[pmp-vmem]]
==== Physical Memory Protection and Paging
@@ -3047,6 +3154,8 @@ are modified, M-mode software must synchronize the PMP settings with the
virtual memory system and any PMP or address-translation caches. This is
accomplished by executing an SFENCE.VMA instruction with _rs1_=`x0` and
_rs2_=`x0`, after the PMP CSRs are written.
+See <<hyp-mm-fences>> for additional synchronization requirements when the
+hypervisor extension is implemented.
If page-based virtual memory is not implemented, memory accesses check
the PMP settings synchronously, so no SFENCE.VMA is needed.
diff --git a/src/mm-alloy.adoc b/src/mm-alloy.adoc
index 93c0c41..1352fad 100644
--- a/src/mm-alloy.adoc
+++ b/src/mm-alloy.adoc
@@ -2,7 +2,7 @@
== Formal Axiomatic Specification in Alloy
We present a formal specification of the RVWMO memory model in Alloy
-(http://alloy.mit.edu). This model is available online at
+(https://alloytools.org/). This model is available online at
https://github.com/daniellustig/riscv-memory-model.
The online material also contains some litmus tests and some examples of
diff --git a/src/mm-eplan.adoc b/src/mm-eplan.adoc
index 1243b1d..470a3ab 100644
--- a/src/mm-eplan.adoc
+++ b/src/mm-eplan.adoc
@@ -922,7 +922,7 @@ instruction will be followed by a conditional branch checking whether
the outcome was successful; this implies that there will be a control
dependency from the store operation generated by the SC instruction to
any memory operations following the branch. PPO
-rule <<ppo-ctrl>> in turn implies that any subsequent store
+rule <<ppo, 11>> in turn implies that any subsequent store
operations will appear later in the global memory order than the store
operation generated by the SC. However, since control, address, and data
dependencies are defined over memory operations, and since an
diff --git a/src/mm-formal.adoc b/src/mm-formal.adoc
index 648e21a..fb89914 100644
--- a/src/mm-formal.adoc
+++ b/src/mm-formal.adoc
@@ -525,7 +525,7 @@ a construction of the post-transition model state for each.
Transitions for all instructions:
-latexmath:[$\bullet$] <<fetch, Fetch instruction>>: This transition represents a fetch and decode of a new instruction instance, as a program order successor of a previously fetched
+* <<fetch, Fetch instruction>>: This transition represents a fetch and decode of a new instruction instance, as a program order successor of a previously fetched
instruction instance (or the initial fetch address).
The model assumes the instruction memory is fixed; it does not describe
@@ -534,16 +534,17 @@ not generate memory load operations, and the shared memory is not
involved in the transition. Instead, the model depends on an external
oracle that provides an opcode when given a memory location.
-latexmath:[$\circ$] <<reg_write, Register write>>: This is a write of a register value.
+[circle]
+* <<reg_write, Register write>>: This is a write of a register value.
-latexmath:[$\circ$] <<reg_read, Register read>>: This is a read of a register value from the most recent
+* <<reg_read, Register read>>: This is a read of a register value from the most recent
program-order-predecessor instruction instance that writes to that
register.
-latexmath:[$\circ$] <<sail_interp, Pseudocode internal step>>: This covers pseudocode internal computation: arithmetic, function
+* <<sail_interp, Pseudocode internal step>>: This covers pseudocode internal computation: arithmetic, function
calls, etc.
-latexmath:[$\circ$] <<finish, Finish instruction>>: At this point the instruction pseudocode is done, the instruction cannot be restarted, memory accesses cannot be discarded, and all memory
+* <<finish, Finish instruction>>: At this point the instruction pseudocode is done, the instruction cannot be restarted, memory accesses cannot be discarded, and all memory
effects have taken place. For conditional branch and indirect jump
instructions, any program order successors that were fetched from an
address that is not the one that was written to the _pc_ register are
@@ -552,15 +553,20 @@ them.
Transitions specific to load instructions:
-latexmath:[$\circ$] <<initiate_load, Initiate memory load operations>>: At this point the memory footprint of the load instruction is
+[circle]
+* <<initiate_load, Initiate memory load operations>>: At this point the memory footprint of the load instruction is
provisionally known (it could change if earlier instructions are
restarted) and its individual memory load operations can start being
satisfied.
-latexmath:[$\bullet$] <<sat_from_forwarding, Satisfy memory load operation by forwarding from unpropogated stores>>: This partially or entirely satisfies a single memory load operation
-by forwarding, from program-order-previous memory store operations.
-latexmath:[$\bullet$] <<sat_from_mem, Satisfy memory load operation from memory>>: This entirely satisfies the outstanding slices of a single memory
+
+[disc]
+* <<sat_from_forwarding, Satisfy memory load operation by forwarding from unpropogated stores>>: This partially or entirely satisfies a single memory load operation by forwarding, from program-order-previous memory store operations.
+
+* <<sat_from_mem, Satisfy memory load operation from memory>>: This entirely satisfies the outstanding slices of a single memory
load operation, from memory.
-latexmath:[$\circ$] <<complete_loads, Complete load operations>>: At this point all the memory load operations of the instruction have
+
+[circle]
+* <<complete_loads, Complete load operations>>: At this point all the memory load operations of the instruction have
been entirely satisfied and the instruction pseudocode can continue
executing. A load instruction can be subject to being restarted until
the transition. But, under some conditions, the model might treat a load
@@ -568,44 +574,56 @@ instruction as non-restartable even before it is finished (e.g. see ).
Transitions specific to store instructions:
-latexmath:[$\circ$] <<initiate_store_footprint, Initiate memory store operation footprints>>: At this point the memory footprint of the store is provisionally
+[circle]
+* <<initiate_store_footprint, Initiate memory store operation footprints>>: At this point the memory footprint of the store is provisionally
known.
-latexmath:[$\circ$] <<instantiate_store_value, Instantiate memory store operation values>>: At this point the memory store operations have their values and
+
+* <<instantiate_store_value, Instantiate memory store operation values>>: At this point the memory store operations have their values and
program-order-successor memory load operations can be satisfied by
forwarding from them.
-latexmath:[$\circ$] <<commit_stores, Commit store instruction>>: At this point the store operations are guaranteed to happen (the
+
+* <<commit_stores, Commit store instruction>>: At this point the store operations are guaranteed to happen (the
instruction can no longer be restarted or discarded), and they can start
being propagated to memory.
-latexmath:[$\bullet$] <<prop_store, Propagate store operation>>: This propagates a single memory store operation to memory.
-latexmath:[$\circ$] <<complete_stores, Complete store operations>>: At this point all the memory store operations of the instruction
+
+[disc]
+* <<prop_store, Propagate store operation>>: This propagates a single memory store operation to memory.
+
+[circle]
+* <<complete_stores, Complete store operations>>: At this point all the memory store operations of the instruction
have been propagated to memory, and the instruction pseudocode can
continue executing.
Transitions specific to `sc` instructions:
-latexmath:[$\bullet$] <<early_sc_fail, Early sc fail>>: This causes the `sc` to fail, either a spontaneous fail or because
-it is not paired with a program-order-previous `lr`.
-latexmath:[$\bullet$] <<paired_sc, Paired sc>>: This transition indicates the `sc` is paired with an `lr` and might
+[disc]
+* <<early_sc_fail, Early sc fail>>: This causes the `sc` to fail, either a spontaneous fail or becauset is not paired with a program-order-previous `lr`.
+
+* <<paired_sc, Paired sc>>: This transition indicates the `sc` is paired with an `lr` and might
succeed.
-latexmath:[$\bullet$] <<commit_sc, Commit and propagate store operation of an sc>>: This is an atomic execution of the transitions <<commit_stores, Commit store instruction>> and <<prop_store, Propagate store operation>>, it is enabled
+
+* <<commit_sc, Commit and propagate store operation of an sc>>: This is an atomic execution of the transitions <<commit_stores, Commit store instruction>> and <<prop_store, Propagate store operation>>, it is enabled
only if the stores from which the `lr` read from have not been
overwritten.
-latexmath:[$\bullet$] <<late_sc_fail, Late sc fail>>: This causes the `sc` to fail, either a spontaneous fail or because
+
+* <<late_sc_fail, Late sc fail>>: This causes the `sc` to fail, either a spontaneous fail or because
the stores from which the `lr` read from have been overwritten.
Transitions specific to AMO instructions:
-latexmath:[$\bullet$] <<do_amo, Satisfy, commit and propagate operations of an AMO>>: This is an atomic execution of all the transitions needed to satisfy
+[disc]
+* <<do_amo, Satisfy, commit and propagate operations of an AMO>>: This is an atomic execution of all the transitions needed to satisfy
the load operation, do the required arithmetic, and propagate the store
operation.
Transitions specific to fence instructions:
-latexmath:[$\circ$] <<commit_fence, Commit fence>>
+[circle]
+* <<commit_fence, Commit fence>>
The transitions labeled latexmath:[$\circ$] can always be taken eagerly,
as soon as their precondition is satisfied, without excluding other
-behavior; the latexmath:[$\bullet$] cannot. Although is marked with a
+behavior; the latexmath:[$\bullet$] cannot. Although <<fetch, Fetch instruction>> is marked with a
latexmath:[$\bullet$], it can be taken eagerly as long as it is not
taken infinitely many times.
@@ -1214,7 +1232,7 @@ time if:
. every memory store operation that has been forwarded to
latexmath:[$i'$] is propagated;
. the conditions of <<commit_stores, Commit store instruction>> is satisfied;
-. the conditions of <<prop_stores, Commit store instruction>> is satisfied (notice that an `sc` instruction can
+. the conditions of <<prop_store, Propagate store instruction>> is satisfied (notice that an `sc` instruction can
only have one memory store operation); and
. for every store slice latexmath:[$msos$] from latexmath:[$msoss$],
latexmath:[$msos$] has not been overwritten, in the shared memory, by a
@@ -1224,7 +1242,7 @@ since latexmath:[$msos$] was propagated to memory.
Action:
. apply the actions of <<commit_stores, Commit store instruction>>; and
-. apply the action of <<prop_stores, Commit store instruction>>.
+. apply the action of <<prop_store, Propagate store instruction>>.
[[late_sc_fail]]
===== Late `sc` fail
@@ -1403,7 +1421,7 @@ _instruction_tree_; and
==== Limitations
* The model covers user-level RV64I and RV64A. In particular, it does
-not support the misaligned atomics extension "Zam" or the total store
+not support the misaligned atomicity granule PMA or the total store
ordering extension "Ztso". It should be trivial to adapt the model to
RV32I/A and to the G, Q and C extensions, but we have never tried it.
This will involve, mostly, writing Sail code for the instructions, with
diff --git a/src/naming.adoc b/src/naming.adoc
index 3898948..f597733 100644
--- a/src/naming.adoc
+++ b/src/naming.adoc
@@ -17,7 +17,7 @@ The ISA naming strings are case insensitive.
=== Base Integer ISA
-RISC-V ISA strings begin with either RV32I, RV32E, RV64I, or RV128I
+RISC-V ISA strings begin with either RV32I, RV32E, RV64I, RV64E, or RV128I
indicating the supported address space size in bits for the base integer
ISA.
@@ -41,7 +41,7 @@ e.g., "Q" for quad-precision floating-point, or "C" for the 16-bit
compressed instruction format.
Some ISA extensions depend on the presence of other extensions, e.g.,
-"D" depends on "F" and "F" depends on "Zicsr". These dependences
+"D" depends on "F" and "F" depends on "Zicsr". These dependencies
may be implicit in the ISA name: for example, RV32IF is equivalent to
RV32IFZicsr, and RV32ID is equivalent to RV32IFD and RV32IFDZicsr.
@@ -84,32 +84,47 @@ an alphabetical name and an optional version number. For example,
The first letter following the "Z" conventionally indicates the most
closely related alphabetical extension category, IMAFDQCVH. For the
-"Zam" extension for misaligned atomics, for example, the letter "a"
-indicates the extension is related to the "A" standard extension. If
+"Zfa" extension for additional floating-point instructions, for example, the letter "f"
+indicates the extension is related to the "F" standard extension. If
multiple "Z" extensions are named, they should be ordered first by
category, then alphabetically within a category—for example,
"Zicsr_Zifencei_Zam".
-Extensions with the "Z" prefix must be separated from other
-multi-letter extensions by an underscore, e.g.,
+All multi-letter extensions, including those with the "Z" prefix, must be
+separated from other multi-letter extensions by an underscore, e.g.,
"RV32IMACZicsr_Zifencei".
=== Supervisor-level Instruction-Set Extensions
-Standard supervisor-level instruction-set extensions are defined in
-Volume II, but are named using "S" as a prefix, followed by an
-alphabetical name and an optional version number. Supervisor-level
-extensions must be separated from other multi-letter extensions by an
-underscore.
+Standard extensions that extend the supervisor-level virtual-memory
+architecture are prefixed with the letters "Sv", followed by an alphabetical
+name and an optional version number, or by a numeric name with no version number.
+Other standard extensions that extend
+the supervisor-level architecture are prefixed with the letters "Ss",
+followed by an alphabetical name and an optional version number. Such
+extensions are defined in Volume II.
Standard supervisor-level extensions should be listed after standard
unprivileged extensions. If multiple supervisor-level extensions are
listed, they should be ordered alphabetically.
+=== Hypervisor-level Instruction-Set Extensions
+
+Standard extensions that extend the hypervisor-level architecture are prefixed
+with the letters "Sh".
+If multiple hypervisor-level extensions are listed, they should be ordered
+alphabetically.
+
+NOTE: Many augmentations to the hypervisor-level archtecture are more
+naturally defined as supervisor-level extensions, following the scheme
+described in the previous section.
+The "Sh" prefix is used by the few hypervisor-level extensions that have no
+supervisor-visible effects.
+
=== Machine-level Instruction-Set Extensions
Standard machine-level instruction-set extensions are prefixed with the
-three letters "Zxm".
+letters "Sm".
Standard machine-level extensions should be listed after standard
lesser-privileged extensions. If multiple machine-level extensions are
@@ -122,9 +137,10 @@ alphabetical name and an optional version number. For example,
"Xhwacha" names the Hwacha vector-fetch ISA extension; "Xhwacha2"
and "Xhwacha2p0" name version 2.0 of same.
-Non-standard extensions must be listed after all standard extensions.
-They must be separated from other multi-letter extensions by an
-underscore. For example, an ISA with non-standard extensions Argle and
+Non-standard extensions must be listed after all standard extensions, and,
+like other multi-letter extensions, must be separated from other multi-letter
+extensions by an underscore.
+For example, an ISA with non-standard extensions Argle and
Bargle may be named "RV64IZifencei_Xargle_Xbargle".
If multiple non-standard extensions are listed, they should be ordered
@@ -152,7 +168,7 @@ e.g., RV32IMACV is legal, whereas RV32IMAVC is not.
3+|*Standard Unprivileged Extensions*
-|Integer Multiplication and Division |M |
+|Integer Multiplication and Division |M |Zmmul
|Atomics |A |
@@ -176,19 +192,17 @@ e.g., RV32IMACV is legal, whereas RV32IMAVC is not.
|Instruction-Fetch Fence |Zifencei |
-|Misaligned Atomics |Zam |A
-
|Total Store Ordering |Ztso |
3+|*Standard Supervisor-Level Extensions*
-|Supervisor-level extension "def" |Sdef |
+|Supervisor-level extension "def" |Ssdef |
3+|*Standard Machine-Level Extensions*
-|Machine-level extension "jkl" |Zxmjkl |
+|Machine-level extension "jkl" |Smjkl |
3+|*Non-Standard Extensions*
|Non-standard extension "mno" |Xmno |
-|=== \ No newline at end of file
+|===
diff --git a/src/priv-csrs.adoc b/src/priv-csrs.adoc
index ae31ade..43509db 100644
--- a/src/priv-csrs.adoc
+++ b/src/priv-csrs.adoc
@@ -42,10 +42,12 @@ accesses to be intercepted. This change should be transparent to the
less-privileged software.
====
-Attempts to access a non-existent CSR raise an illegal instruction
-exception. Attempts to access a CSR without appropriate privilege level
-or to write a read-only register also raise illegal instruction
-exceptions. A read/write register might also contain some bits that are
+Instructions that access a non-existent CSR are reserved.
+Attempts to access a CSR without appropriate privilege level
+raise illegal-instruction exceptions or, as described in
+<<sec:hcauses>>, virtual-instruction exceptions.
+Attempts to write a read-only register raise illegal-instruction exceptions.
+A read/write register might also contain some bits that are
read-only, in which case writes to the read-only bits are ignored.
<<csrrwpriv>> also indicates the convention to
@@ -56,7 +58,7 @@ standard extensions.
Machine-mode standard read-write CSRs `0x7A0`-`0x7BF` are reserved for
use by the debug system. Of these CSRs, `0x7A0`-`0x7AF` are accessible
to machine mode, whereas `0x7B0`-`0x7BF` are only visible to debug mode.
-Implementations should raise illegal instruction exceptions on
+Implementations should raise illegal-instruction exceptions on
machine-mode access to the latter set of registers.
[NOTE]
@@ -80,204 +82,52 @@ Note that not all registers are required on all implementations.
[[csrrwpriv]]
.Allocation of RISC-V CSR address ranges.
-[%autowidth,float="center",align="center",cols="^,^,^,^,<"]
+[%autowidth,float="center",align="center",cols="^,^,^,^,<,<,<,<"]
|===
-3+|CSR Address .2+|Hex .2+|Use and Accessibility
+3+^|CSR Address 2.2+|Hex 3.2+|Use and Accessibility
|[11:10] |[9:8] |[7:4]
-5+|Unprivileged and User-Level CSRs
-|`00` +
-`01` +
-`10` +
-`11` +
-`11` +
-`11`
-|`00` +
-`00` +
-`00` +
-`00` +
-`00` +
-`00`
-|`XXXX` +
-`XXXX` +
-`XXXX` +
-`0XXX` +
-`10XX` +
-`11XX`
-|`0x000-0x0FF` +
-`0x400-0x4FF` +
-`0x800-0x8FF` +
-`0xC00-0xC7F` +
-`0xC80-0xCBF` +
-`0xCC0-0xCFF`
-|Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read-only +
-Standard read-only +
-Custom read-only
-5+|Supervisor-Level CSRs
-|`00` +
-`01` +
-`01` +
-`01` +
-`10` +
-`10` +
-`10` +
-`11` +
-`11` +
-`11`
-|`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01`
-|`XXXX` +
-`0XXX` +
-`10XX` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX`
-|`0x100-0x1FF` +
-`0x500-0x57F` +
-`0x580-0x5BF` +
-`0x5C0-0x5FF` +
-`0x900-0x97F` +
-`0x980-0x9BF` +
-`0x9C0-0x9FF` +
-`0xD00-0xD7F` +
-`0xD80-0xDBF` +
-`0xDC0-0xDFF`
-|Standard read/write +
-Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read-only +
-Standard read-only +
-Custom read-only
-5+|Hypervisor and VS CSRs
-|`00` +
-`01` +
-`01` +
-`01` +
-`10` +
-`10` +
-`10` +
-`11` +
-`11` +
-`11`
-|`10` +
-`10` +
-`10` +
-`10` +
-`10` +
-`10` +
-`10` +
-`10` +
-`10` +
-`10`
-|`XXXX` +
-`0XXX` +
-`10XX` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX`
-|`0x200-0x2FF` +
-`0x600-0x67F` +
-`0x680-0x6BF` +
-`0x6C0-0x6FF` +
-`0xA00-0xA7F` +
-`0xA80-0xABF` +
-`0xAC0-0xAFF` +
-`0xE00-0xE7F` +
-`0xE80-0xEBF` +
-`0xEC0-0xEFF`
-|Standard read/write +
-Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read-only +
-Standard read-only +
-Custom read-only
-5+|Machine-Level CSRs
-|`00` +
-`01` +
-`01` +
-`01` +
-`01` +
-`01` +
-`10` +
-`10` +
-`10` +
-`11` +
-`11` +
-`11`
-|`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11` +
-`11`
-|`XXXX` +
-`0XXX` +
-`100X` +
-`1010` +
-`1011` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX` +
-`0XXX` +
-`10XX` +
-`11XX`
-|`0x300-0x3FF` +
-`0x700-0x77F` +
-`0x780-0x79F` +
-`0x7A0-0x7AF` +
-`0x7B0-0x7BF` +
-`0x7C0-0x7FF` +
-`0xB00-0xB7F` +
-`0xB80-0xBBF` +
-`0xBC0-0xBFF` +
-`0xF00-0xF7F` +
-`0xF80-0xFBF` +
-`0xFC0-0xFFF`
-|Standard read/write +
-Standard read/write +
-Standard read/write +
-Standard read/write debug CSRs +
-Debug-mode-only CSRs +
-Custom read/write +
-Standard read/write +
-Standard read/write +
-Custom read/write +
-Standard read-only +
-Standard read-only +
-Custom read-only
+8+|Unprivileged and User-Level CSRs
+|`00` |`00` |`XXXX` 2+| `0x000-0x0FF` 3+|Standard read/write
+|`01` |`00` |`XXXX` 2+| `0x400-0x4FF` 3+|Standard read/write
+|`10` |`00` |`XXXX` 2+| `0x800-0x8FF` 3+|Custom read/write
+|`11` |`00` |`0XXX` 2+| `0xC00-0xC7F` 3+|Standard read-only
+|`11` |`00` |`10XX` 2+| `0xC80-0xCBF` 3+|Standard read-only
+|`11` |`00` |`11XX` 2+| `0xCC0-0xCFF` 3+|Custom read-only
+8+|Supervisor-Level CSRs
+|`00` |`01` |`XXXX` 2+| `0x100-0x1FF` 3+|Standard read/write
+|`01` |`01` |`0XXX` 2+| `0x500-0x57F` 3+|Standard read/write
+|`01` |`01` |`10XX` 2+| `0x580-0x5BF` 3+|Standard read/write
+|`01` |`01` |`11XX` 2+| `0x5C0-0x5FF` 3+|Custom read/write
+|`10` |`01` |`0XXX` 2+| `0x900-0x97F` 3+|Standard read/write
+|`10` |`01` |`10XX` 2+| `0x980-0x9BF` 3+|Standard read/write
+|`10` |`01` |`11XX` 2+| `0x9C0-0x9FF` 3+|Custom read/write
+|`11` |`01` |`0XXX` 2+| `0xD00-0xD7F` 3+|Standard read-only
+|`11` |`01` |`10XX` 2+| `0xD80-0xDBF` 3+|Standard read-only
+|`11` |`01` |`11XX` 2+| `0xDC0-0xDFF` 3+|Custom read-only
+8+|Hypervisor and VS CSRs
+|`00` |`10` |`XXXX` 2+| `0x200-0x2FF` 3+|Standard read/write
+|`01` |`10` |`0XXX` 2+| `0x600-0x67F` 3+|Standard read/write
+|`01` |`10` |`10XX` 2+| `0x680-0x6BF` 3+|Standard read/write
+|`01` |`10` |`11XX` 2+| `0x6C0-0x6FF` 3+|Custom read/write
+|`10` |`10` |`0XXX` 2+| `0xA00-0xA7F` 3+|Standard read/write
+|`10` |`10` |`10XX` 2+| `0xA80-0xABF` 3+|Standard read/write
+|`10` |`10` |`11XX` 2+| `0xAC0-0xAFF` 3+|Custom read/write
+|`11` |`10` |`0XXX` 2+| `0xE00-0xE7F` 3+|Standard read/write
+|`11` |`10` |`10XX` 2+| `0xE80-0xEBF` 3+|Standard read/write
+|`11` |`10` |`11XX` 2+| `0xEC0-0xEFF` 3+|Custom read/write
+8+|Machine-Level CSRs
+|`00` |`11` |`XXXX` 2+|`0x300-0x3FF` 3+|Standard read/write
+|`01` |`11` |`0XXX` 2+|`0x700-0x77F` 3+|Standard read/write
+|`01` |`11` |`100X` 2+|`0x780-0x79F` 3+|Standard read/write
+|`01` |`11` |`1010` 2+|`0x7A0-0x7AF` 3+|Standard read/write debug CSRs
+|`01` |`11` |`1011` 2+|`0x7B0-0x7BF` 3+|Debug-mode-only CSRs
+|`01` |`11` |`11XX` 2+|`0x7C0-0x7FF` 3+|Custom read/write
+|`10` |`11` |`0XXX` 2+|`0xB00-0xB7F` 3+|Standard read/write
+|`10` |`11` |`10XX` 2+|`0xB80-0xBBF` 3+|Standard read/write
+|`10` |`11` |`11XX` 2+|`0xBC0-0xBFF` 3+|Custom read/write
+|`11` |`11` |`0XXX` 2+|`0xF00-0xF7F` 3+|Standard read/write
+|`11` |`11` |`10XX` 2+|`0xF80-0xFBF` 3+|Standard read/write
+|`11` |`11` |`11XX` 2+|`0xFC0-0xFFF` 3+|Custom read/write
|===
<<<
@@ -438,25 +288,29 @@ Supervisor interrupt pending.
`0x603` +
`0x604` +
`0x606` +
-`0x607`
+`0x607` +
+`0x612`
|HRW +
HRW +
HRW +
HRW +
HRW +
+HRW +
HRW
|`hstatus` +
`hedeleg` +
`hideleg` +
`hie` +
`hcounteren` +
-`hgeie`
+`hgeie` +
+`hedelegh`
|Hypervisor status register. +
Hypervisor exception delegation register. +
Hypervisor interrupt delegation register. +
Hypervisor interrupt-enable register. +
Hypervisor counter enable. +
-Hypervisor guest external interrupt-enable register.
+Hypervisor guest external interrupt-enable register. +
+Upper 32 bits of `hedeleg`, RV32 only.
4+^|Hypervisor Trap Handling
@@ -590,7 +444,8 @@ Pointer to configuration data structure.
`0x304` +
`0x305` +
`0x306` +
-`0x310`
+`0x310` +
+`0x312`
|MRW +
MRW +
MRW +
@@ -598,6 +453,7 @@ MRW +
MRW +
MRW +
MRW +
+MRW +
MRW
|`mstatus` +
`misa` +
@@ -606,7 +462,8 @@ MRW
`mie` +
`mtvec` +
`mcounteren` +
-`mstatush`
+`mstatush` +
+`medelegh`
|Machine status register. +
ISA and extensions +
Machine exception delegation register. +
@@ -614,7 +471,8 @@ Machine interrupt delegation register. +
Machine interrupt-enable register. +
Machine trap-handler base address. +
Machine counter enable. +
-Additional machine status register, RV32 only.
+Additional machine status register, RV32 only. +
+Upper 32 bits of `medeleg`, RV32 only.
4+^|Machine Trap Handling
@@ -901,8 +759,8 @@ differentiate between the supported values, but must always return the
complete specified bit-encoding of any supported value when read.
====
-Implementations are permitted but not required to raise an illegal
-instruction exception if an instruction attempts to write a
+Implementations are permitted but not required to raise an
+illegal-instruction exception if an instruction attempts to write a
non-supported value to a *WLRL* field. Implementations can return arbitrary
bit patterns on the read of a *WLRL* field when the last write was of an
illegal value, but the value returned should deterministically depend on
@@ -961,7 +819,7 @@ mode.
[[csrwidthmodulation]]
=== CSR Width Modulation
-If the width of a CSR is changed (for example, by changing MXLEN or
+If the width of a CSR is changed (for example, by changing SXLEN or
UXLEN, as described in <<xlen-control>>), the
values of the _writable_ fields and bits of the new-width CSR are,
unless specified otherwise, determined from the previous-width CSR as
@@ -1003,4 +861,4 @@ Standard high-half CSRs are accessible only when
the base RISC-V instruction set is RV32 (XLEN=32).
For RV64 (when XLEN=64), the addresses of all standard high-half CSRs
are reserved, so an attempt to access a high-half CSR
-typically raises an illegal instruction exception.
+typically raises an illegal-instruction exception.
diff --git a/src/priv-preface.adoc b/src/priv-preface.adoc
index e62d59f..94ec43f 100644
--- a/src/priv-preface.adoc
+++ b/src/priv-preface.adoc
@@ -2,28 +2,34 @@
= Preface
This document describes the RISC-V privileged architecture. This
-release, version 20211203, contains the following versions of the RISC-V ISA
+release, version 20240213, contains the following versions of the RISC-V ISA
modules:
[%autowidth,float="center",align="center",cols="^,<,^",options="header",]
|===
|Module |Version |Status
|_Machine ISA_ +
+_Supervisor ISA_ +
_Smrnmi Extension_ +
-*Supervisor ISA* +
+*Svade Extension* +
*Svnapot Extension* +
*Svpbmt Extension* +
*Svinval Extension* +
+*Svadu Extension* +
*Hypervisor ISA*
|_1.13_ +
+_1.13_ +
_0.1_ +
-*1.12* +
+*1.0* +
+*1.0* +
*1.0* +
*1.0* +
*1.0* +
*1.0*
|_Draft_ +
_Draft_ +
+_Draft_ +
+*Ratified* +
*Ratified* +
*Ratified* +
*Ratified* +
@@ -31,14 +37,41 @@ _Draft_ +
*Ratified*
|===
-The following compatible changes have been made to the Machine ISA since
+The following changes have been made since version 1.12, which, while
+not strictly backwards compatible, are not anticipated to cause software
+portability problems in practice:
+
+* Redefined `misa`.MXL to be read-only, making MXLEN a constant.
+* Added the constraint that SXLEN&#8805;UXLEN.
+
+Additionally, the following compatible changes have been made to the Machine ISA since
version 1.12:
+* Transliterated the document from LaTeX into AsciiDoc.
* Defined the `misa`.V field to reflect that the V extension has been
implemented.
+* Defined the RV32-only `medelegh` and `hedelegh` CSRs.
+* Defined the misaligned atomicity granule PMA, superseding the proposed Zam
+ extension.
+* Allocated interrupt 13 for Sscofpmf LCOFI interrupt.
+* Defined hardware error and software check exception codes.
+* Specified synchronization requirements when changing the PBMTE fields
+in `menvcfg` and `henvcfg`.
+* Incorporated Svade and Svadu extension specifications.
+* Clarified that "platform- or custom-use" interrupts are actually
+"platform-use interrupts", where the platform can choose to make some custom.
* Clarified semantics of explicit accesses to CSRs wider than XLEN bits.
+* Clarified that MXLEN&#8805;SXLEN.
+* Clarified that WFI is not a HINT instruction.
+* Clarified that VS-stage page-table accesses set G-stage A/D bits.
+* Clarified ordering rules when PBMT=IO is used on main-memory regions.
+* Clarified ordering rules for hardware A/D bit updates.
+* Clarified that, for a given exception cause, `__x__tval` might sometimes
+be set to a nonzero value but sometimes not.
+* Clarified exception behavior of unimplemented or inaccessible CSRs.
+* Clarified that Svpbmt allows implementations to override additional PMAs.
-*_Preface to Version 20211203_*
+[.big]*_Preface to Version 20211203_*
This document describes the RISC-V privileged architecture. This
release, version 20211203, contains the following versions of the RISC-V
@@ -127,7 +160,7 @@ Sv48, and Sv57 PTEs.
Finally, the hypervisor architecture proposal has been extensively
revised.
-*_Preface to Version 1.11_*
+[.big]*_Preface to Version 1.11_*
This is version 1.11 of the RISC-V privileged architecture. The document
contains the following versions of the RISC-V ISA modules:
@@ -180,7 +213,7 @@ incrementing to reduce energy consumption.
* Specified contents of CSRs across XLEN modification.
* Moved PLIC chapter into its own document.
-*_Preface to Version 1.10_*
+[.big]*_Preface to Version 1.10_*
This is version 1.10 of the RISC-V privileged architecture proposal.
Changes from version 1.9.1 include:
@@ -243,7 +276,7 @@ virtual-memory management operations has been added.
* The Supervisor Binary Interface (SBI) chapter has been removed, so
that it can be maintained as a separate specification.
-*_Preface to Version 1.9.1_*
+[.big]*_Preface to Version 1.9.1_*
This is version 1.9.1 of the RISC-V privileged architecture proposal.
Changes from version 1.9 include:
diff --git a/src/resources/riscv-spec.bib b/src/resources/riscv-spec.bib
index c4d6b3b..1354344 100644
--- a/src/resources/riscv-spec.bib
+++ b/src/resources/riscv-spec.bib
@@ -25,7 +25,7 @@
@article{Katevenis:1984,
author = {Katevenis, Manolis G.H. and Sherburne,Jr., Robert W. and Patterson, David A. and S{\'e}quin, Carlo H.},
title = {The {RISC II} micro-architecture},
- journal = {Advances in VLSI and Computur Systems},
+ journal = {Advances in VLSI and Computer Systems},
issue_date = {Fall 1984},
volume = {1},
number = {2},
diff --git a/src/resources/themes/riscv-spec.yml b/src/resources/themes/riscv-spec.yml
index d514426..e8332fc 100644
--- a/src/resources/themes/riscv-spec.yml
+++ b/src/resources/themes/riscv-spec.yml
@@ -1,6 +1,8 @@
+extends: default
font:
catalog:
- merge: false
+ merge: true
+ sans-serif: GEM_FONTS_DIR/mplus1p-regular-fallback.ttf
#Petrona
body:
normal: Petrona-Light.ttf
@@ -41,7 +43,7 @@ font:
- M+ 1p Fallback
- Droid Fallback
svg:
- - M+ 1p Fallback
+ fallback-font-family: M+ 1mn
page:
background_color: ffffff
layout: portrait
@@ -74,20 +76,26 @@ codespan:
#font_family: Droid Fallback
font_family: M+ 1mn
font_style: normal
+ font-size: 11.5
menu_caret_content: " <font size=\"1.15em\"><color rgb=\"000000\">\u203a</color></font> "
heading:
align: left
margin_bottom: 0.25in
- min_height_after: 0.25in
+ margin_top: 0.5in
+ min_height_after: auto
font_color: 000000
font_family: headings
font_style: bold
h1_font_size: floor($base_font_size * 2.8)
# h2 is used for chapter titles (book doctype only)
- h2_font_size: floor($base_font_size * 2.0)
- h3_font_size: round($base_font_size * 1.7)
- h4_font_size: $base_font_size_large
- h5_font_size: $base_font_size
+ #h2_font_size: floor($base_font_size * 2.0)
+ h2-font-size: 11.5
+ #h3_font_size: round($base_font_size * 1.7)
+ h3-font-size: 11.5
+ #h4_font_size: $base_font_size_large
+ h4-font-size: 11.5
+ #h5_font_size: $base_font_size
+ h5-font-size: 11.5
h6_font_size: $base_font_size_small
title_page:
align: center
@@ -103,6 +111,7 @@ title_page:
font_family: headings
font_style: light
font_size: floor($base_font_size * 1.2)
+ margin-top: 25
authors:
font_family: headings
font_color: 3e058e
@@ -155,14 +164,17 @@ admonition:
padding: [0, $horizontal_rhythm, 0, $horizontal_rhythm]
icon:
note:
- name: pencil-square-o
+ # name: pencil-square-o
+ name: far-edit
stroke_color: 6489b3
tip:
- name: comments-o
+ #name: comments-o
+ name: far-comments
stroke_color: 646b74
size: 24
important:
- name: info
+ #name: info
+ name: fas-info-circle
stroke_color: 5f8c8b
warning:
stroke_color: 9c4d4b
@@ -208,7 +220,7 @@ example:
border_radius: $base_border_radius
border_width: 0.2
background_color: ffffff
- # FIXME reenable padding bottom once margin collapsing is implemented
+ # FIXME re-enable padding bottom once margin collapsing is implemented
padding: [$vertical_rhythm, $horizontal_rhythm, 0, $horizontal_rhythm]
image:
align: left
@@ -238,6 +250,7 @@ figure:
align: center
table:
background_color: $page_background_color
+ font-size: 9
#head_background_color: #2596be
#head_font_color: $base_font_color
head_font_style: bold
@@ -248,51 +261,41 @@ table:
border_width: $base_border_width
cell_padding: 3
caption:
- end: bottom
+ end: top
align: center
text-align: center
max-width: none
toc:
indent: $horizontal_rhythm
line_height: 1.4
- dot_leader:
- #content: ". "
+ dot-leader:
+ content: " "
font_color: a9a9a9
- #levels: 2 3
+ font-style: bold
+ #levels: 2
+ h2-font-style: bold
# NOTE in addition to footer, header is also supported
header:
font_size: $base_font_size_small
- # NOTE if background_color is set, background and border will span width of page
border_color: dddddd
border_width: 0.35
height: $base_line_height_length * 2.6
line_height: 1
- padding: [$base_line_height_length / 1.3, 1, 0, 1]
+ padding: [$base_line_height_length / 1, 1, .5, 1]
vertical_align: margin_inside
- #image_vertical_align: <alignment> or <number>
- # additional attributes for content:
- # * {page-count}
- # * {page-number}
- # * {document-title}
- # * {document-subtitle}
- # * {chapter-title}
- # * {section-title}
- # * {section-or-chapter-title}
recto:
right:
content: '{section-or-chapter-title} | Page {page-number}'
verso:
left:
content: '{section-or-chapter-title} | Page {page-number}'
- # left: 'Page {page-number} | {section-or-chapter-title}'
footer:
font_size: $base_font_size_small
- # NOTE if background_color is set, background and border will span width of page
border_color: dddddd
- border_width: 0.25
- height: $base_line_height_length * 2.5
+ border_width: 0.35
+ height: $base_line_height_length * 2.6
line_height: 1
- padding: [$base_line_height_length / 2, 1, 0, 1]
+ padding: [$base_line_height_length / 1, 1, .5, 1]
vertical_align: top
#image_vertical_align: <alignment> or <number>
# additional attributes for content:
diff --git a/src/riscv-privileged.adoc b/src/riscv-privileged.adoc
index 516fc3c..7ca9ad1 100644
--- a/src/riscv-privileged.adoc
+++ b/src/riscv-privileged.adoc
@@ -2,9 +2,8 @@
= The RISC-V Instruction Set Manual: Volume II: Privileged Architecture
:description: Volume II - Privileged Architecture
:company: RISC-V.org
-:author: Andrew waterman, waterman@eecs.berkeley.edu; Krste Asanović, krste@berkeley.edu; John Hauser, jh.riscv@jhauser.us, SiFive Inc., CS Division, EECS Department, University of California, Berkeley
-:revdate: 04/2023
-//:revnumber: ASCIIDOC Conversion
+:revdate: Revised 20240213
+:revnumber: 20240213
//:revremark: Pre-release version
//development: assume everything can change
//stable: assume everything could change
@@ -21,7 +20,7 @@
:title-logo-image: image:risc-v_logo.png[pdfwidth=3.25in,align=center]
:page-background-image: image:draft.png[opacity=20%]
//:title-page-background-image: none
-:back-cover-image: image:backpage.png[opacity=25%]
+//:back-cover-image: image:backpage.png[opacity=25%]
// Settings:
:experimental:
:reproducible:
@@ -52,6 +51,11 @@ endif::[]
:hide-uri-scheme:
:stem: latexmath
:footnote:
+:le: &#8804;
+:ge: &#8805;
+:ne: &#8800;
+:approx: &#8776;
+:inf: &#8734;
_Contributors to all versions of the spec in alphabetical order (please contact
editors to suggest corrections): Krste Asanović, Peter Ashenden, Rimas
@@ -73,9 +77,6 @@ privileged specification version 1.9.1 released under following license: ©2010-
Avižienis,
David Patterson, Krste Asanović. Creative Commons Attribution 4.0 International License._
-_Please cite as: “The RISC-V Instruction Set Manual, Volume II: Privileged Architecture, Document Version 20211203”, Editors Andrew Waterman, Krste Asanović, and John Hauser, RISC-V
-International, December 2021._
-
//the colophon allows for a section after the preamble that is part of the frontmatter and therefore not assigned a page number.
//include::colophon.adoc[]
//preface.tex
@@ -86,12 +87,16 @@ include::priv-intro.adoc[]
include::priv-csrs.adoc[]
//machine.tex
include::machine.adoc[]
+include::smstateen.adoc[]
+include::smepmp.adoc[]
//rnmi.tex
include::rnmi.adoc[]
//supervisor.tex
include::supervisor.adoc[]
+include::sscofpmt.adoc[]
//hypervisor.tex
include::hypervisor.adoc[]
+include::sstc.adoc[]
//priv-insns.tex
include::priv-insns.adoc[]
//priv-history.tex
diff --git a/src/riscv-unprivileged.adoc b/src/riscv-unprivileged.adoc
index b89a44d..7a3ab3a 100644
--- a/src/riscv-unprivileged.adoc
+++ b/src/riscv-unprivileged.adoc
@@ -1,18 +1,12 @@
[[risc-v-isa]]
-= The RISC-V Instruction Set Manual: Volume I: Unprivileged Architecture
+= The RISC-V Instruction Set Manual Volume I: Unprivileged Architecture
:description: Unprivileged Architecture
:company: RISC-V.org
-:authors: Editors: Andrew waterman, Krste Asanovic, SiFive, Inc., CS Division, EECS Department, University of California, Berkeley
-:revdate: 12/2022
-:revnumber: ASCIIDOC Conversion
-:revremark: Pre-release version
-//development: assume everything can change
-//stable: assume everything could change
-//frozen: of you implement this version you assume the risk that something might change because of the public review cycle but we expect little to no change.
-//ratified: you can implement this and be assured nothing will change. if something needs to change due to an errata or enhancement, it will come out in a new extension. we do not revise extensions.
+:revdate: Revised 20230723
+:revnumber: 20191214
+//:revremark: Pre-release version
:url-riscv: http://riscv.org
:doctype: book
-//:doctype: report
:colophon:
:pdf-theme: ../src/resources/themes/riscv-spec.yml
:pdf-fontsdir: ../src/resources/fonts/
@@ -22,7 +16,8 @@
:title-logo-image: image:risc-v_logo.png[pdfwidth=3.25in,align=center]
:page-background-image: image:draft.png[opacity=20%]
//:title-page-background-image: none
-:back-cover-image: image:backpage.png[opacity=25%]
+//:back-cover-image: image:backpage.png[opacity=25%]
+:back-cover-image: image:riscv-horizontal-color.svg[opacity=25%]
// Settings:
:experimental:
:reproducible:
@@ -52,19 +47,21 @@ endif::[]
:hide-uri-scheme:
:stem: latexmath
:footnote:
+:le: &#8804;
+:ge: &#8805;
+:ne: &#8800;
+:approx: &#8776;
+:inf: &#8734;
+:csrname: envcfg
_Contributors to all versions of the spec in alphabetical order (please contact editors to suggest
corrections): Arvind, Krste Asanović, Rimas Avižienis, Jacob Bachmeyer, Christopher F. Batten,
-Allen J. Baum, Alex Bradbury, Scott Beamer, Preston Briggs, Christopher Celio, Chuanhua
-Chang, David Chisnall, Paul Clayton, Palmer Dabbelt, Ken Dockser, Roger Espasa, Greg Favor,
-Shaked Flur, Stefan Freudenberger, Marc Gauthier, Andy Glew, Jan Gray, Michael Hamburg, John
-Hauser, David Horner, Bruce Hoult, Bill Huffman, Alexandre Joannou, Olof Johansson, Ben Keller,
-David Kruckemyer, Yunsup Lee, Paul Loewenstein, Daniel Lustig, Yatin Manerkar, Luc Maranget,
-Margaret Martonosi, Joseph Myers, Vijayanand Nagarajan, Rishiyur Nikhil, Jonas Oberhauser,
-Stefan O'Rear, Albert Ou, John Ousterhout, David Patterson, Christopher Pulte, Jose Renau,
-Josh Scheid, Colin Schmidt, Peter Sewell, Susmit Sarkar, Michael Taylor, Wesley Terpstra, Matt
-Thomas, Tommy Thorn, Caroline Trippel, Ray VanDeWalker, Muralidaran Vijayaraghavan, Megan
-Wachs, Andrew Waterman, Robert Watson, Derek Williams, Andrew Wright, Reinoud Zandijk,
+Allen J. Baum, Abel Bernabeu, Alex Bradbury, Scott Beamer, Preston Briggs, Christopher Celio, Chuanhua
+Chang, David Chisnall, Paul Clayton, Palmer Dabbelt, Ken Dockser, Paul Donahue, Aaron Durbin, Roger Espasa, Greg Favor, Andy Glew, Shaked Flur, Stefan Freudenberger, Marc Gauthier, Andy Glew, Jan Gray, Michael Hamburg, John
+Hauser, John Ingalls, David Horner, Bruce Hoult, Bill Huffman, Alexandre Joannou, Olof Johansson, Ben Keller,
+David Kruckemyer, Tariq Kurd, Yunsup Lee, Paul Loewenstein, Daniel Lustig, Yatin Manerkar, Luc Maranget,
+Margaret Martonosi, Phil McCoy, Christoph Müllner, Joseph Myers, Vijayanand Nagarajan, Rishiyur Nikhil, Jonas Oberhauser, Stefan O'Rear, Albert Ou, John Ousterhout, David Patterson, Christopher Pulte, Jose Renau,
+Josh Scheid, Colin Schmidt, Peter Sewell, Susmit Sarkar, Ved Shanbhogue, Michael Taylor, Wesley Terpstra, Matt Thomas, Tommy Thorn, Philipp Tomsich, Caroline Trippel, Ray VanDeWalker, Muralidaran Vijayaraghavan, Megan Wachs, Paul Wamsley Andrew Waterman, Robert Watson, David Weaver, Derek Williams, Andrew Wright, Reinoud Zandijk,
and Sizhuo Zhang._
_This document is released under a Creative Commons Attribution 4.0 International License._
@@ -115,6 +112,7 @@ include::rvwmo.adoc[]
//rvwmo.tex
include::c-st-ext.adoc[]
//c.tex
+include::zimop.adoc[]
include::b-st-ext.adoc[]
//b.tex
include::j-st-ext.adoc[]
@@ -123,14 +121,17 @@ include::p-st-ext.adoc[]
//p.tex
include::v-st-ext.adoc[]
//v.tex
-include::zam-st-ext.adoc[]
-//zam.tex
include::zfinx.adoc[]
//zfinx.tex
include::zfa.adoc[]
//zfa.tex
include::ztso-st-ext.adoc[]
//ztso.tex
+include::cmo.adoc[]
+include::zawrs.adoc[]
+
+include::zc.adoc[]
+
include::rv-32-64g.adoc[]
//gmaps.tex
include::extending.adoc[]
@@ -143,7 +144,12 @@ include::mm-eplan.adoc[]
//memory.tex
include::mm-formal.adoc[]
//end of memory.tex, memory-model-alloy.tex, memory-model-herd.tex
+//Appendices for Vector
+include::vector-examples.adoc[]
+include::calling-convention.adoc[]
+//include::fraclmul.adoc[]
+//End of Vector appendices
include::index.adoc[]
// this is generated generated from index markers.
include::bibliography.adoc[]
-// this references the riscv-spec.bi file that has been copied into the resources directoy \ No newline at end of file
+// this references the riscv-spec.bi file that has been copied into the resources directory
diff --git a/src/rnmi.adoc b/src/rnmi.adoc
index 63dd473..f505f56 100644
--- a/src/rnmi.adoc
+++ b/src/rnmi.adoc
@@ -1,9 +1,9 @@
[[rnmi]]
-== "Smrnmi" Standard Extension for Resumable Non-Maskable Interrupts, Version 0.4
+== "Smrnmi" Standard Extension for Resumable Non-Maskable Interrupts, Version 0.5
[WARNING]
====
-*Warning! This draft specification may change before being accepted as
+*Warning! This frozen specification may change before being accepted as
standard by RISC-V International.*
====
@@ -71,20 +71,30 @@ of holding.
.Resumable NMI cause `mncause`.
include::images/bytefield/mncause.edn[]
-The `mncause` CSR holds the reason for the NMI, with bit MXLEN-1 set to
-1, and the NMI cause encoded in the least-significant bits or zero if
-NMI causes are not supported.
+The `mncause` CSR holds the reason for the NMI.
+If the reason is an interrupt, bit MXLEN-1 is set to 1, and the NMI
+cause is encoded in the least-significant bits.
+If the reason is an interrupt and NMI causes are not supported, bit MXLEN-1 is
+set to 1, and zero is written to the least-significant bits.
+If the reason is an exception within M-mode that results in a double trap as
+specified in the Smdbltrp extension, bit MXLEN-1 is set to 0 and the
+least-significant bits are set to the cause code corresponding to the
+exception that precipitated the double trap.
.Resumable NMI status register `mnstatus`.
include::images/bytefield/mnstatus.edn[]
The `mnstatus` CSR holds a two-bit field, MNPP, which on entry to the
-trap handler holds the privilege mode of the interrupted context,
+RNMI trap handler holds the privilege mode of the interrupted context,
encoded in the same manner as `mstatus`.MPP. It also holds a one-bit
-field, MNPV, which on entry to the trap handler holds the virtualization
+field, MNPV, which on entry to the RNMI trap handler holds the virtualization
mode of the interrupted context, encoded in the same manner as
`mstatus`.MPV.
+If the Zicfilp extension is implemented, `mnstatus` also holds the MNPELP
+field, which on entry to the RNMI trap handler holds the previous `ELP` state.
+When an RNMI trap is taken, MNPELP is set to `ELP` and `ELP` is set to 0.
+
`mnstatus` also holds the NMIE bit. When NMIE=1, nonmaskable interrupts
are enabled. When NMIE=0, _all_ interrupts are disabled.
@@ -131,6 +141,8 @@ MNRET is an M-mode-only instruction that uses the values in `mnepc` and
`mnstatus` to return to the program counter, privilege mode, and
virtualization mode of the interrupted context. This instruction also
sets `mnstatus`.NMIE. If MNRET changes the privilege mode to a mode less privileged than M, it also sets `mstatus`.MPRV to 0.
+If the Zicfilp extension is implemented, then if `mnstatus`.MNPP holds the
+value __y__, MNRET sets `ELP` to the logical AND of __y__LPE and `mnstatus`.MNPELP.
=== RNMI Operation
@@ -154,4 +166,4 @@ The Smrnmi extension does not change the behavior of the MRET and SRET
instructions. In particular, MRET and SRET are unaffected by the
`mnstatus`.NMIE bit, and their execution does not alter the
`mnstatus`.NMIE bit.
-==== \ No newline at end of file
+====
diff --git a/src/rv-32-64g.adoc b/src/rv-32-64g.adoc
index cda8b19..1818ddf 100644
--- a/src/rv-32-64g.adoc
+++ b/src/rv-32-64g.adoc
@@ -18,7 +18,7 @@ and RV64G.
|inst[6:5]
|00 |LOAD |LOAD-FP |_custom-0_ |MISC-MEM |OP-IMM |AUIPC |OP-IMM-32 |48b
|01 |STORE |STORE-FP |_custom-1_ |AMO |OP |LUI |OP-32 |64b
-|10 |MADD |MSUB |NMSUB |NMADD |OP-FP |_reserved_ |_custom-2/rv128_|48b
+|10 |MADD |MSUB |NMSUB |NMADD |OP-FP |OP-V |_custom-2/rv128_|48b
|11 |BRANCH |JALR |_reserved_ |JAL |SYSTEM |_reserved_ |_custom-3/rv128_|&#8805;80b
|===
@@ -442,6 +442,15 @@ ISA.
2+|1101010 |00011 |rs1 |rm |rd |1010011 |FCVT.H.LU
|===
+[%autowidth.stretch,float="center",align="center",cols="^2m,^2m,^2m,^2m,<2m,>3m, <4m, >4m, <4m, >4m, <4m, >4m, <4m, >4m, <6m"]
+|===
+15+^|Zawrs Standard Extension
+
+6+^|000000001101 2+^|00000 2+^|000 2+^|00000 2+^|1110011 <|WRS.NTO
+6+^|000000011101 2+^|00000 2+^|000 2+^|00000 2+^|1110011 <|WRS.STO
+|===
+
+
<<rvgcsrnames>> lists the CSRs that have currently been
allocated CSR addresses. The timers, counters, and floating-point CSRs
are the only CSRs defined in this specification.
@@ -462,4 +471,4 @@ are the only CSRs defined in this specification.
|0xC80|Read-only|cycleh|Upper 32 bits of `cycle`, RV32I only.
|0xC81|Read-only|timeh|Upper 32 bits of `time`, RV32I only.
|0xC82|Read-only|instreth|Upper 32 bits of `instret`, RV32I only.
-|=== \ No newline at end of file
+|===
diff --git a/src/rv32.adoc b/src/rv32.adoc
index 6e57344..bd38ac8 100644
--- a/src/rv32.adoc
+++ b/src/rv32.adoc
@@ -50,7 +50,7 @@ holds the address of the current instruction.
[[gprs]]
.RISC-V base unprivileged integer register state.
-[col[s="<|^|>"|option[s="header",width="50%",align="center"grid="rows"]
+[cols="<,^,>",options="header",width="50%",align="center",grid="rows"]
|===
<| [.small]#XLEN-1#| >| [.small]#0#
3+^| [.small]#x0/zero#
@@ -885,7 +885,7 @@ hardware.
include::images/wavedrom/env_call-breakpoint.adoc[]
[[env-call]]
-//.Evironment call and breakpoint instructions
+//.Environment call and breakpoint instructions
These two instructions cause a precise requested trap to the supporting
execution environment.
@@ -1005,7 +1005,7 @@ hints, security tags, and instrumentation flags for simulation/emulation.
|AUIPC |_rd_=`x0` |latexmath:[$2^{20}$]
-|ADDI |_rd_=`x0`, and either _rs1_ &#8800; `x0` or _imm_&#8800;0 |latexmath:[$2^{17}-1$]
+|ADDI |_rd_=`x0`, and either _rs1_&#8800;``x0`` or _imm_&#8800;0 |latexmath:[$2^{17}-1$]
|ANDI |_rd_=`x0` |latexmath:[$2^{17}$]
@@ -1013,14 +1013,14 @@ hints, security tags, and instrumentation flags for simulation/emulation.
|XORI |_rd_=`x0` |latexmath:[$2^{17}$]
-|ADD |_rd_=`x0`, _rs1_=`x0` |latexmath:[$2^{10}-32$]
+|ADD |_rd_=`x0`, _rs1_&#8800;``x0`` |latexmath:[$2^{10}-32$]
-|ADD |_rd_=`x0`, _rs1_=`x0`, _rs2_=`x2-x5` | 28
+|ADD |_rd_=`x0`, _rs1_=`x0`, _rs2_&#8800;``x2-x5`` | 28
|ADD |_rd_=`x0`, _rs1_=`x0`, _rs2_=`x2-x5` |4|(_rs2_=`x2`) NTL.P1 +
(_rs2_=`x3`) NTL.PALL +
-(_rs3_=`x4`) NTL.S1 +
-(_rs2_=`x5`)
+(_rs2_=`x4`) NTL.S1 +
+(_rs2_=`x5`) NTL.ALL
|SUB |_rd_=`x0` |latexmath:[$2^{10}$] .11+<.^m|_Designated for future standard use_
@@ -1036,13 +1036,13 @@ hints, security tags, and instrumentation flags for simulation/emulation.
|SRA |_rd_=`x0` |latexmath:[$2^{10}$]
-|FENCE|_rd_=`x0`, _rs1_ &#8800; `x0`, _fm_=0, and either _pred_=0 or _succ_=0| latexmath:[$2^{10}-63$]
+|FENCE|_rd_=`x0`, _rs1_&#8800;``x0``, _fm_=0, and either _pred_=0 or _succ_=0| latexmath:[$2^{10}-63$]
-|FENCE|_rd_ &#8800; `x0`, _rs1_=`x0`, _fm_=0, and either _pred_=0 or _succ_=0| latexmath:[$2^{10}-63$]
+|FENCE|_rd_&#8800;``x0``, _rs1_=`x0`, _fm_=0, and either _pred_=0 or _succ_=0| latexmath:[$2^{10}-63$]
|FENCE |_rd_=_rs1_=`x0`, _fm_=0, _pred_=0, _succ_&#8800;0 |15
-|FENCE |_rd_=_rs1_=`x0`, _fm_=0, _pred_&#8800;W, _succ_&#8800;0 |15
+|FENCE |_rd_=_rs1_=`x0`, _fm_=0, _pred_&#8800;W, _succ_=0 |15
|FENCE |_rd_=_rs1_=`x0`, _fm_=0, _pred_=W, _succ_=0 |1 |PAUSE
diff --git a/src/rv32e.adoc b/src/rv32e.adoc
index 7f19d49..c30b598 100644
--- a/src/rv32e.adoc
+++ b/src/rv32e.adoc
@@ -43,4 +43,4 @@ The previous draft of this chapter made all encodings using the
conservative approach, making these reserved so that they can be
allocated between custom space or new standard encodings at a later
date.
-==== \ No newline at end of file
+====
diff --git a/src/rv64.adoc b/src/rv64.adoc
index 4a28ac9..b675dc1 100644
--- a/src/rv64.adoc
+++ b/src/rv64.adoc
@@ -78,7 +78,7 @@ _imm[5] &#8800; 0_ are reserved.
[NOTE]
====
Previously, SLLIW, SRLIW, and SRAIW with _imm[5] &#8800; 0_
-were defined to cause illegal instruction exceptions, whereas now they
+were defined to cause illegal-instruction exceptions, whereas now they
are marked as reserved. This is a backwards-compatible change.
====
diff --git a/src/rvwmo.adoc b/src/rvwmo.adoc
index 98e1b2e..fd0bd21 100644
--- a/src/rvwmo.adoc
+++ b/src/rvwmo.adoc
@@ -7,7 +7,7 @@ returned by loads of memory. RISC-V uses a memory model called "RVWMO"
(RISC-V Weak Memory Ordering) which is designed to provide flexibility
for architects to build high-performance scalable designs while
simultaneously supporting a tractable programming model.
-(((design, high performace)))
+(((design, high performance)))
(((design, scalable)))
Under RVWMO, code running on a single hart appears to execute in order
@@ -20,10 +20,8 @@ base RISC-V ISA provides a FENCE instruction for this purpose, described
in <<fence>>, while the atomics extension "A" additionally defines load-reserved/store-conditional and atomic read-modify-write instructions.
(((atomics, misaligned)))
-The standard ISA extension for misaligned atomics "Zam"
-(<<zam>>) and the standard ISA extension for total
-store ordering "Ztso" (<<ztso>>) augment RVWMO
-with additional rules specific to those extensions.
+The standard ISA extension for total store ordering "Ztso" (<<ztso>>) augments
+RVWMO with additional rules specific to those extensions.
The appendices to this specification provide both axiomatic and
operational formalizations of the memory consistency model as well as
@@ -102,12 +100,13 @@ a set of component memory operations of any granularity. The memory
operations generated by such instructions are not ordered with respect
to each other in program order, but they are ordered normally with
respect to the memory operations generated by preceding and subsequent
-instructions in program order. The atomics extension "A" does not
-require execution environments to support misaligned atomic instructions
-at all; however, if misaligned atomics are supported via the "Zam"
-extension, LRs, SCs, and AMOs may be decomposed subject to the
-constraints of the atomicity axiom for misaligned atomics, which is
-defined in <<zam>>.
+instructions in program order.
+The atomics extension "A" does not require execution environments to support
+misaligned atomic instructions at all.
+However, if misaligned atomics are supported via the misaligned atomicity
+granule PMA, then AMOs within an atomicity granule are not decomposed, nor are
+loads and stores defined in the base ISAs, nor are loads and stores of no more
+than XLEN bits defined in the F, D, and Q extensions.
(((decomposition)))
[NOTE]
@@ -558,7 +557,7 @@ register(s) to destination register(s) as specified
.RV32M Standard Extension
[%autowidth.stretch,float="center",align="center",cols="<,<,<,<,<",options="header"]
|===
-| |Source Regisers |Destination Registers |Accumulating CSRs|
+| |Source Registers |Destination Registers |Accumulating CSRs|
|MUL | _rs1_, _rs2_ |_rd_ ||
diff --git a/src/smepmp.adoc b/src/smepmp.adoc
new file mode 100644
index 0000000..547f723
--- /dev/null
+++ b/src/smepmp.adoc
@@ -0,0 +1,171 @@
+[[smepmp]]
+== PMP Enhancements for memory access and execution prevention on Machine mode (Smepmp)
+=== Introduction
+
+Being able to access the memory of a process running at a high privileged execution mode, such as the Supervisor or Machine mode, from a lower privileged mode such as the User mode, introduces an obvious attack vector since it allows for an attacker to perform privilege escalation, and tamper with the code and/or data of that process. A less obvious attack vector exists when the reverse happens, in which case an attacker instead of tampering with code and/or data that belong to a high-privileged process, can tamper with the memory of an unprivileged / less-privileged process and trick the high-privileged process to use or execute it.
+
+To prevent this attack vector, two mechanisms known as Supervisor Memory Access Prevention (SMAP) and Supervisor Memory Execution Prevention (SMEP) were introduced in recent systems. The first one prevents the OS from accessing the memory of an unprivileged process unless a specific code path is followed, and the second one prevents the OS from executing the memory of an unprivileged process at all times. RISC-V already includes support for SMAP, through the ``sstatus.SUM`` bit, and for SMEP by always denying execution of virtual memory pages marked with the U bit, with Supervisor mode (OS) privileges, as mandated on the Privilege Spec.
+
+
+[NOTE]
+====
+Terms:
+
+* *PMP Entry*: A pair of ``pmpcfg[i]`` / ``pmpaddr[i]`` registers.
+* *PMP Rule*: The contents of a pmpcfg register and its associated pmpaddr register(s), that encode a valid protected physical memory region, where ``pmpcfg[i].A != OFF``, and if ``pmpcfg[i].A == TOR``, ``pmpaddr[i-1] < pmpaddr[i]``.
+* *Ignored*: Any permissions set by a matching PMP rule are ignored, and _all_ accesses to the requested address range are allowed.
+* *Enforced*: Only access types configured in the PMP rule matching the requested address range are allowed; failures will cause an access-fault exception.
+* *Denied*: Any permissions set by a matching PMP rule are ignored, and _no_ accesses to the requested address range are allowed.; failures will cause an access-fault exception.
+* *Locked*: A PMP rule/entry where the ``pmpcfg.L`` bit is set.
+* *PMP reset*: A reset process where all PMP settings of the hart, including locked rules/settings, are re-initialized to a set of safe defaults, before releasing the hart (back) to the firmware / OS / application.
+====
+
+==== Threat model
+
+However, there are no such mechanisms available on Machine mode in the current (v1.11) Privileged Spec. It is not possible for a PMP rule to be *enforced* only on non-Machine modes and *denied* on Machine mode, to only allow access to a memory region by less-privileged modes. it is only possible to have a *locked* rule that will be *enforced* on all modes, or a rule that will be *enforced* on non-Machine modes and be *ignored* by Machine mode. So for any physical memory region which is not protected with a Locked rule, Machine mode has unlimited access, including the ability to execute it.
+
+Without being able to protect less-privileged modes from Machine mode, it is not possible to prevent the mentioned attack vector. This becomes even more important for RISC-V than on other architectures, since implementations are allowed where a hart only has Machine and User modes available, so the whole OS will run on Machine mode instead of the non-existent Supervisor mode. In such implementations the attack surface is greatly increased, and the same kind of attacks performed on Supervisor mode and mitigated through SMAP/SMEP, can be performed on Machine mode without any available mitigations. Even on implementations with Supervisor mode present attacks are still possible against the Firmware and/or the Secure Monitor running on Machine mode.
+
+[[proposal]]
+=== Proposal
+
+. *Machine Security Configuration (mseccfg)* is a new RW Machine mode CSR, used for configuring various security mechanisms present on the hart, and only accessible to Machine mode. It is 64 bits wide, and is at address *0x747 on RV64* and *0x747 (low 32bits), 0x757 (high 32bits) on RV32*. All mseccfg fields defined on this proposal are WARL, and the remaining bits are reserved for future standard use and should always read zero. The reset value of mseccfg is implementation-specific, otherwise if backwards compatibility is a requirement it should reset to zero on hard reset.
+
+. On ``mseccfg`` we introduce a field on bit 2 called *Rule Locking Bypass (mseccfg.RLB)* with the following functionality:
++
+.. When ``mseccfg.RLB`` is 1 *locked* PMP rules may be removed/modified and *locked* PMP entries may be edited.
+
+.. When ``mseccfg.RLB`` is 0 and ``pmpcfg.L`` is 1 in any rule or entry (including disabled entries), then ``mseccfg.RLB`` remains 0 and any further modifications to ``mseccfg.RLB`` are ignored until a *PMP reset*.
++
+[CAUTION]
+====
+Note that this feature is intended to be used as a debug mechanism, or as a temporary workaround during the boot process for simplifying software, and optimizing the allocation of memory and PMP rules. Using this functionality under normal operation, after the boot process is completed, should be avoided since it weakens the protection of _M-mode-only_ rules. Vendors who don’t need this functionality may hardwire this field to 0.
+====
+
+. On ``mseccfg`` we introduce a field in bit 1 called *Machine Mode Whitelist Policy (mseccfg.MMWP)*. This is a sticky bit, meaning that once set it cannot be unset until a *PMP reset*. When set it changes the default PMP policy for M-mode when accessing memory regions that don’t have a matching PMP rule, to *denied* instead of *ignored*.
+
+. On ``mseccfg`` we introduce a field in bit 0 called *Machine Mode Lockdown (mseccfg.MML)*. This is a sticky bit, meaning that once set it cannot be unset until a *PMP reset*. When ``mseccfg.MML`` is set the system's behavior changes in the following way:
+
+.. The meaning of ``pmpcfg.L`` changes: Instead of marking a rule as *locked* and *enforced* in all modes, it now marks a rule as *M-mode-only* when set and *S/U-mode-only* when unset. The formerly reserved encoding of ``pmpcfg.RW=01``, and the encoding ``pmpcfg.LRWX=1111``, now encode a *Shared-Region*.
++
+An _M-mode-only_ rule is *enforced* on Machine mode and *denied* in Supervisor or User mode. It also remains *locked* so that any further modifications to its associated configuration or address registers are ignored until a *PMP reset*, unless ``mseccfg.RLB`` is set.
++
+An _S/U-mode-only_ rule is *enforced* on Supervisor and User modes and *denied* on Machine mode.
++
+A _Shared-Region_ rule is *enforced* on all modes, with restrictions depending on the ``pmpcfg.L`` and ``pmpcfg.X`` bits:
++
+* A _Shared-Region_ rule where ``pmpcfg.L`` is not set can be used for sharing data between M-mode and S/U-mode, so is not executable. M-mode has read/write access to that region, and S/U-mode has read access if ``pmpcfg.X`` is not set, or read/write access if ``pmpcfg.X`` is set.
++
+* A _Shared-Region_ rule where ``pmpcfg.L`` is set can be used for sharing code between M-mode and S/U-mode, so is not writeable. Both M-mode and S/U-mode have execute access on the region, and M-mode also has read access if ``pmpcfg.X`` is set. The rule remains *locked* so that any further modifications to its associated configuration or address registers are ignored until a *PMP reset*, unless ``mseccfg.RLB`` is set.
++
+* The encoding ``pmpcfg.LRWX=1111`` can be used for sharing data between M-mode and S/U mode, where both modes only have read-only access to the region. The rule remains *locked* so that any further modifications to its associated configuration or address registers are ignored until a *PMP reset*, unless ``mseccfg.RLB`` is set.
+
+
+.. Adding a rule with executable privileges that either is *M-mode-only* or a *locked* *Shared-Region* is not possible and such ``pmpcfg`` writes are ignored, leaving ``pmpcfg`` unchanged. This restriction can be temporarily lifted by setting ``mseccfg.RLB`` e.g. during the boot process.
+
+.. Executing code with Machine mode privileges is only possible from memory regions with a matching *M-mode-only* rule or a *locked* *Shared-Region* rule with executable privileges. Executing code from a region without a matching rule or with a matching _S/U-mode-only_ rule is *denied*.
+
+.. If ``mseccfg.MML`` is not set, the combination of ``pmpcfg.RW=01`` remains reserved for future standard use.
+
+
+==== Truth table when mseccfg.MML is set
+
+[cols="^1,^1,^1,^1,^3,^3",stripes=even,options="header"]
+|===
+4+|Bits on _pmpcfg_ register {set:cellbgcolor:green} 2+|Result
+|L|R|W|X|M Mode|S/U Mode
+|{set:cellbgcolor:!} 0|0|0|0 2+|Inaccessible region (Access Exception)
+|0|0|0|1|Access Exception|Execute-only region
+|0|0|1|0 2+|Shared data region: Read/write on M mode, read-only on S/U mode
+|0|0|1|1 2+|Shared data region: Read/write for both M and S/U mode
+|0|1|0|0|Access Exception|Read-only region
+|0|1|0|1|Access Exception|Read/Execute region
+|0|1|1|0|Access Exception|Read/Write region
+|0|1|1|1|Access Exception|Read/Write/Execute region
+|1|0|0|0 2+|Locked inaccessible region* (Access Exception)
+|1|0|0|1|Locked Execute-only region*|Access Exception
+|1|0|1|0 2+|Locked Shared code region: Execute only on both M and S/U mode.*
+|1|0|1|1 2+|Locked Shared code region: Execute only on S/U mode, read/execute on M mode.*
+|1|1|0|0|Locked Read-only region*|Access Exception
+|1|1|0|1|Locked Read/Execute region*|Access Exception
+|1|1|1|0|Locked Read/Write region*|Access Exception
+|1|1|1|1 2+|Locked Shared data region: Read only on both M and S/U mode.*
+|===
+
+*: *Locked* rules cannot be removed or modified until a *PMP reset*, unless ``mseccfg.RLB`` is set.
+
+==== Visual representation of the proposal
+
+image::smepmp-visual-representation.png[]
+
+=== Smepmp software discovery
+
+Since all fields defined on ``mseccfg`` as part of this proposal are locked when set (``MMWP``/``MML``) or locked when cleared (``RLB``), software can't poll them for determining the presence of Smepmp. It is expected that BootROM will set ``mseccfg.MMWP`` and/or ``mseccfg.MML`` during early boot, before jumping to the firmware, so that the firmware will be able to determine the presence of Smepmp by reading ``mseccfg`` and checking the state of ``mseccfg.MMWP`` and ``mseccfg.MML``.
+
+[[rationale]]
+=== Rationale
+
+. Since a CSR for security and / or global PMP behavior settings is not available with the current spec, we needed to define a new one. This new CSR will allow us to add further security configuration options in the future and also allow developers to verify the existence of the new mechanisms defined on this proposal.
+
+. There are use cases where developers want to enforce PMP rules in M-mode during the boot process, that are also able to modify, merge, and / or remove later on. Since a rule that is enforced in M-mode also needs to be locked (or else badly written or malicious M-mode software can remove it at any time), the only way for developers to approach this is to keep adding PMP rules to the chain and rely on rule priority. This is a waste of PMP rules and since it’s only needed during boot, ``mseccfg.RLB`` is a simple workaround that can be used temporarily and then disabled and locked down.
++
+Also when ``mseccfg.MML`` is set, according to 4b it’s not possible to add a _Shared-Region_ rule with executable privileges. So RLB can be set temporarily during the boot process to register such regions. Note that it’s still possible to register executable _Shared-Region_ rules using initial register settings (that may include ``mseccfg.MML`` being set and the rule being set on PMP registers) on *PMP reset*, without using RLB.
++
+[WARNING]
+====
+*Be aware that RLB introduces a security vulnerability if left set after the boot process is over and in general it should be used with caution, even when used temporarily.* Having editable PMP rules in M-mode gives a false sense of security since it only takes a few malicious instructions to lift any PMP restrictions this way. It doesn’t make sense to have a security control in place and leave it unprotected. Rule Locking Bypass is only meant as a way to optimize the allocation of PMP rules, catch errors durring debugging, and allow the bootrom/firmware to register executable _Shared-Region_ rules. If developers / vendors have no use for such functionality, they should never set ``mseccfg.RLB`` and if possible hard-wire it to 0. In any case *RLB should be disabled and locked as soon as possible*.
+====
++
+[NOTE]
+====
+If ``mseccfg.RLB`` is not used and left unset, it wil be locked as soon as a PMP rule/entry with the ``pmpcfg.L`` bit set is configured.
+====
++
+[IMPORTANT]
+====
+Since PMP rules with a higher priority override rules with a lower priority, locked rules must precede non-locked rules.
+====
+
+. With the current spec M-mode can access any memory region unless restricted by a PMP rule with the ``pmpcfg.L`` bit set. There are cases where this approach is overly permissive, and although it’s possible to restrict M-mode by adding PMP rules during the boot process, this can also be seen as a waste of PMP rules. Having the option to block anything by default, and use PMP as a whitelist for M-mode is considered a safer approach. This functionality may be used during the boot process or upon *PMP reset*, using initial register settings. +
+
+. The current dual meaning of the ``pmpcfg.L`` bit that marks a rule as Locked and *enforced* on all modes is neither flexible nor clean. With the introduction of _Machine Mode Lock-down_ the ``pmpcfg.L`` bit distinguishes between rules that are *enforced* *only* in M-mode (_M-mode-only_) or *only* in S/U-modes (_S/U-mode-only_). The rule locking becomes part of the definition of an _M-mode-only_ rule, since when a rule is added in M mode, if not locked, can be modified or removed in a few instructions. On the other hand, S/U modes can’t modify PMP rules anyway so locking them doesn’t make sense.
+
+.. This separation between _M-mode-only_ and _S/U-mode-only_ rules also allows us to distinguish which regions are to be used by processes in Machine mode (``pmpcfg.L == 1``) and which by Supervisor or User mode processes (``pmpcfg.L == 0``), in the same way the U bit on the Virtual Memory’s PTEs marks which Virtual Memory pages are to be used by User mode applications (U=1) and which by the Supervisor / OS (U=0). With this distinction in place we are able to implement memory access and execution prevention in M-mode for any physical memory region that is not _M-mode-only_.
++
+An attacker that manages to tamper with a memory region used by S/U mode, even after successfully tricking a process running in M-mode to use or execute that region, will fail to perform a successful attack since that region will be _S/U-mode-only_ hence any access when in M-mode will trigger an access exception.
++
+[NOTE]
+====
+In order to support zero-copy transfers between M-mode and S/U-mode we need to either allow shared memory regions, or introduce a mechanism similar to the ``sstatus.SUM`` bit to temporary allow the high-privileged mode (in this case M-mode) to be able to perform loads and stores on the region of a less-privileged process (in this case S/U-mode). In our case after discussion within the group it seemed a better idea to follow the first approach and have this functionality encoded on a per-rule basis to avoid the risk of leaving a temporary, global bypass active when exiting M-mode, hence rendering memory access prevention useless.
+====
++
+
+[NOTE]
+====
+Although it’s possible to use ``mstatus.MPRV`` in M-mode to read/write data on an _S/U-mode-only_ region using general purpose registers for copying, this will happen with S/U-mode permissions, honoring any MMU restrictions put in place by S-mode. Of course it’s still possible for M-mode to tamper with the page tables and / or add _S/U-mode-only_ rules and bypass the protections put in place by S-mode but if an attacker has managed to compromise M-mode to such extent, no security guarantees are possible in any way. *Also note that the threat model we present here assumes buggy software in M-mode, not compromised software*. We considered disabling ``mstatus.MPRV`` but it seemed too much and out of scope.
+====
++
+_Shared-region_ rules can be used both for zero-copy data transfers and for sharing code segments. The latter may be used for example to allow S/U-mode to execute code by the vendor, that makes use of some vendor-specific ISA extension, without having to go through the firmware with an ecall. This is similar to the vDSO approach followed on Linux, that allows userspace code to execute kernel code without having to perform a system call.
++
+To make sure that shared data regions can’t be executed and shared code regions can’t be modified, the encoding changes the meaning of the ``pmpcfg.X bit``. In case of shared data regions, with the exception of the ``pmpcfg.LRWX=1111`` encoding, the ``pmpcfg.X`` bit marks the capability of S/U-mode to write to that region, so it’s not possible to encode an executable shared data region. In case of shared code regions, the ``pmpcfg.X`` bit marks the capability of M-mode to read from that region, and since ``pmpcfg.RW=01`` is used for encoding the shared region, it’s not possible to encode a shared writable code region.
++
+[NOTE]
+====
+For adding _Shared-region_ rules with executable privileges to share code segments between M-mode and S/U-mode, ``mseccfg.RLB`` needs to be implemented, or else such rules can only be added together with ``mseccfg.MML`` being set on *PMP Reset*. That's because the reserved encoding ``pmpcfg.RW=01`` being used for _Shared-region_ rules is only defined when ``mseccfg.MML`` is set, and 4b prevents the adition of rules with executable privileges on M-mode after ``mseccfg.MML`` is set unless ``mseccfg.RLB`` is also set.
+====
++
+[NOTE]
+====
+Using the ``pmpcfg.LRWX=1111`` encoding for a locked shared read-only data region was decided later on, its initial meaning was an M-mode-only read/write/execute region. The reason for that change was that the already defined shared data regions were not locked, so r/w access to M-mode couldn’t be restricted. In the same way we have execute-only shared code regions for both modes, it was decided to also be able to allow a least-privileged shared data region for both modes. This approach allows for example to share the .text section of an ELF with a shared code region and the .rodata section with a locked shared data region, without allowing M-mode to modify .rodata. We also decided that having a locked read/write/execute region in M-mode doesn’t make much sense and could be dangerous, since M-mode won’t be able to add further restrictions there (as in the case of S/U-mode where S-mode can further limit access to an ``pmpcfg.LWRX=0111`` region through the MMU), leaving the possibility of modifying an executable region in M-mode open.
+====
++
+[NOTE]
+====
+For encoding Shared-region rules initially we used one of the two reserved bits on pmpcfg (bit 5) but in order to avoid allocating an extra bit, since those bits are a very limited resource, it was decided to use the reserved R=0,W=1 combination.
+====
+.. The idea with this restriction is that after the Firmware or the OS running in M-mode is initialized and ``mseccfg.MML`` is set, no new code regions are expected to be added since nothing else is expected to run in M-mode (everything else will run in S/U mode). Since we want to limit the attack surface of the system as much as possible, it makes sense to disallow any new code regions which may include malicious code, to be added/executed in M-mode.
+
+.. In case ``mseccfg.MMWP`` is not set, M-mode can still access and execute any region not covered by a PMP rule. Since we try to prevent M-mode from executing malicious code and since an attacker may manage to place code on some region not covered by PMP (e.g. a directly-addressable flash memory), we need to ensure that M-mode can only execute the code segments initialized during firmware / OS initialization.
+
+.. We are only using the encoding ``pmpcfg.RW=01`` together with ``mseccfg.MML``, if ``mseccfg.MML`` is not set the encoding remains usable for future use.
+
diff --git a/src/smstateen.adoc b/src/smstateen.adoc
new file mode 100644
index 0000000..f524581
--- /dev/null
+++ b/src/smstateen.adoc
@@ -0,0 +1,406 @@
+[[smstateen]]
+== "Smststeen" State Enable Extension, Version 1.0.0
+
+=== Motivation
+
+The implementation of optional RISC-V extensions has the potential to open
+covert channels between separate user threads, or between separate guest OSes
+running under a hypervisor. The problem occurs when an extension adds processor
+state---usually explicit registers, but possibly other forms of state---that
+the main OS or hypervisor is unaware of (and hence won't context-switch) but
+that can be modified/written by one user thread or guest OS and
+perceived/examined/read by another.
+
+For example, the proposed Advanced Interrupt Architecture (AIA) for RISC-V adds
+to a hart as many as ten supervisor-level CSRs (`siselect`, `sireg`, `stopi`,
+`sseteipnum`, `sclreipnum`, `sseteienum`, `sclreienum`, `sclaimei`, `sieh`, and `siph`) and
+provides also the option for hardware to be backward-compatible with older,
+pre-AIA software. Because an older hypervisor that is oblivious to the AIA will
+not know to swap any of the AIA's new CSRs on context switches, the registers may
+then be used as a covert channel between multiple guest OSes that run atop this
+hypervisor. Although traditional practices might consider such a communication
+channel harmless, the intense focus on security today argues that a means be
+offered to plug such channels.
+
+The `f` registers of the RISC-V floating-point extensions and the `v` registers of
+the vector extension would similarly be potential covert channels between user
+threads, except for the existence of the FS and VS fields in the `sstatus`
+register. Even if an OS is unaware of, say, the vector extension and its `v`
+registers, access to those registers is blocked when the VS field is
+initialized to zero, either at machine level or by the OS itself initializing
+`sstatus`.
+
+Obviously, one way to prevent the use of new user-level CSRs as covert channels
+would be to add to `mstatus` or `sstatus` an "XS" field for each relevant
+extension, paralleling the V extension's VS field. However, this is not
+considered a general solution to the problem due to the number of potential
+future extensions that may add small amounts of state. Even with a 64-bit
+`sstatus` (necessitating adding `sstatush` for RV32), it is not certain there are
+enough remaining bits in `sstatus` to accommodate all future user-level
+extensions. In any event, there is no need to strain `sstatus` (and add `sstatush`)
+for this purpose. The "enable" flags that are needed to plug covert channels
+are not generally expected to require swapping on context switches of user
+threads, making them a less-than-compelling candidate for inclusion in `sstatus`.
+Hence, a new place is proposed for them instead.
+
+=== Proposal
+
+For RV64 harts, this extension adds four new 64-bit CSRs at machine level,
+listed with their CSR addresses:
+
+`0x30C mstateen0` (Machine State Enable 0)
+
+`0x30D mstateen1`
+
+`0x30E mstateen2`
+
+`0x30F mstateen3`
+
+If supervisor mode is implemented, another four CSRs are defined at supervisor
+level:
+
+`0x10C sstateen0`
+
+`0x10D sstateen1`
+
+`0x10E sstateen2`
+
+`0x10F sstateen3`
+
+And if the hypervisor extension is implemented, another set of CSRs is added:
+
+`0x60C hstateen0`
+
+`0x60D hstateen1`
+
+`0x60E hstateen2`
+
+`0x60F hstateen3`
+
+For RV32, the registers listed above are 32-bit, and for the machine-level and
+hypervisor CSRs there is a corresponding set of high-half CSRs for the upper 32
+bits of each register:
+
+`0x31C mstateen0h`
+
+`0x31D mstateen1h`
+
+`0x31E mstateen2h`
+
+`0x31F mstateen3h`
+
+`0x61C hstateen0h`
+
+`0x61D hstateen1h`
+
+`0x61E hstateen2h`
+
+`0x61F hstateen3h`
+
+For the supervisor-level `sstateen` registers, high-half CSRs are not added at
+this time because it is expected the upper 32 bits of these registers will
+always be zeros, as explained later below.
+
+Each bit of a `stateen` CSR controls less-privileged access to an extension's
+state, for an extension that was not deemed "worthy" of a full XS field in
+`sstatus` like the FS and VS fields for the F and V extensions. The number of
+registers provided at each level is four because it is believed that 4 * 64 =
+256 bits for machine and hypervisor levels, and 4 * 32 = 128 bits for
+supervisor level, will be adequate for many years to come, perhaps for as long
+as the RISC-V ISA is in use. The exact number four is an attempted compromise
+between providing too few bits on the one hand and going overboard with CSRs
+that will never be used on the other. A possible future doubling of the number
+of `stateen` CSRs is covered later.
+
+The `stateen` registers at each level control access to state at all
+less-privileged levels, but not at its own level. This is analogous to how the
+existing `counteren` CSRs control access to performance counter registers. Just
+as with the `counteren` CSRs, when a `stateen` CSR prevents access to state by
+less-privileged levels, an attempt in one of those privilege modes to execute
+an instruction that would read or write the protected state raises an illegal
+instruction exception, or, if executing in VS or VU mode and the circumstances
+for a virtual instruction exception apply, raises a virtual instruction
+exception instead of an illegal instruction exception.
+
+When this extension is not implemented, all state added by an extension is
+accessible as defined by that extension.
+
+When a `stateen` CSR prevents access to state for a privilege mode, attempting to
+execute in that privilege mode an instruction that _implicitly_ updates the
+state without reading it may or may not raise an illegal instruction or virtual
+instruction exception. Such cases must be disambiguated by being explicitly
+specified one way or the other.
+
+In some cases, the bits of the `stateen` CSRs will have a dual purpose as enables
+for the ISA extensions that introduce the controlled state.
+
+Each bit of a supervisor-level `sstateen` CSR controls user-level access (from
+U-mode or VU-mode) to an extension's state. The intention is to allocate the
+bits of `sstateen` CSRs starting at the least-significant end, bit 0, through to
+bit 31, and then on to the next-higher-numbered `sstateen` CSR.
+
+For every bit with a defined purpose in an `sstateen` CSR, the same bit is
+defined in the matching `mstateen` CSR to control access below machine level to
+the same state. The upper 32 bits of an `mstateen` CSR (or for RV32, the
+corresponding high-half CSR) control access to state that is inherently
+inaccessible to user level, so no corresponding enable bits in the
+supervisor-level `sstateen` CSR are applicable. The intention is to allocate bits
+for this purpose starting at the most-significant end, bit 63, through to bit
+32, and then on to the next-higher `mstateen` CSR. If the rate that bits are
+being allocated from the least-significant end for `sstateen` CSRs is
+sufficiently low, allocation from the most-significant end of `mstateen` CSRs may
+be allowed to encroach on the lower 32 bits before jumping to the next-higher
+`mstateen` CSR. In that case, the bit positions of "encroaching" bits will remain
+forever read-only zeros in the matching `sstateen` CSRs.
+
+With the hypervisor extension, the `hstateen` CSRs have identical encodings to
+the `mstateen` CSRs, except controlling accesses for a virtual machine (from VS
+and VU modes).
+
+Each standard-defined bit of a `stateen` CSR is WARL and may be read-only zero or
+one, subject to the following conditions.
+
+Bits in any `stateen` CSR that are defined to control state that a hart doesn't
+implement are read-only zeros for that hart. Likewise, all reserved bits not
+yet given a defined meaning are also read-only zeros. For every bit in an
+`mstateen` CSR that is zero (whether read-only zero or set to zero), the same bit
+appears as read-only zero in the matching `hstateen` and `sstateen` CSRs. For every
+bit in an `hstateen` CSR that is zero (whether read-only zero or set to zero),
+the same bit appears as read-only zero in `sstateen` when accessed in VS-mode.
+
+A bit in a supervisor-level `sstateen` CSR cannot be read-only one unless the
+same bit is read-only one in the matching `mstateen` CSR and, if it exists, in
+the matching `hstateen` CSR. A bit in an `hstateen` CSR cannot be read-only one
+unless the same bit is read-only one in the matching `mstateen` CSR.
+
+On reset, all writable `mstateen` bits are initialized by the hardware to zeros.
+If machine-level software changes these values, it is responsible for
+initializing the corresponding writable bits of the `hstateen` and `sstateen` CSRs
+to zeros too. Software at each privilege level should set its respective
+`stateen` CSRs to indicate the state it is prepared to allow less-privileged
+software to access. For OSes and hypervisors, this usually means the state that
+the OS or hypervisor is prepared to swap on a context switch, or to manage in
+some other way.
+
+For each `mstateen` CSR, bit 63 is defined to control access to the
+matching `sstateen` and `hstateen` CSRs.
+That is, bit 63 of `mstateen0` controls access to `sstateen0` and `hstateen0`;
+bit 63 of `mstateen1` controls access to `sstateen1` and `hstateen1`; etc.
+Likewise, bit 63 of each `hstateen` correspondingly controls access to
+the matching `sstateen` CSR.
+A hypervisor may need this control over
+accesses to the `sstateen` CSRs if it ever must emulate for a virtual machine an
+extension that is supposed to be affected by a bit in an `sstateen` CSR. (Even if
+such emulation is uncommon, it should not be excluded.) Machine-level software
+needs identical control to be able to emulate the hypervisor extension. (That
+is, machine level needs control over accesses to the supervisor-level `sstateen`
+CSRs in order to emulate the `hstateen` CSRs, which have such control.)
+
+Bit 63 of each `mstateen` CSR may be read-only zero only if the hypervisor
+extension is not implemented and the matching supervisor-level `sstateen` CSR is
+all read-only zeros. In that case, machine-level software should emulate
+attempts to access the affected `sstateen` CSR from S-mode, ignoring writes and
+returning zero for reads. Bit 63 of each `hstateen` CSR is always writable (not
+read-only).
+
+[wavedrom, ,svg]
+....
+{reg: [
+{bits: 1, name: 'C'},
+{bits: 1, name: 'FCSR'},
+{bits: 1, name: 'JVT'},
+{bits: 61, name: 'WPRI'}
+], config:{bits: 64, lanes: 4, hspace:1024}}
+....
+
+The C bit controls access to any and all custom state.
+
+[NOTE]
+====
+Bit 0 of these registers is not custom state itself; it is a standard field of
+a standard CSR, either mstateen0, hstateen0, or sstateen0. The
+requirements that non-standard extensions must meet to be conforming are not
+relaxed due solely to changes in the value of this bit. In particular, if
+software sets this bit but does not execute any custom instructions or access
+any custom state, the software must continue to execute as specified by all
+relevant RISC-V standards, or the hardware is not standard-conforming.
+The FCSR bit controls access to fcsr for the case when floating-point
+instructions operate on x registers instead of f registers as specified by
+the Zfinx and related extensions (Zdinx, etc.). Whenever misa.F = 1, bit 1 of
+mstateen0 is read-only zero (and hence read-only zero in hstateen0 and
+sstateen0 too). For convenience, when the stateen CSRs are implemented and
+misa.F = 0, then if bit 1 of a controlling stateen0 CSR is zero, all
+floating-point instructions cause an illegal instruction trap (or virtual
+instruction trap, if relevant), as though they all access fcsr, regardless of
+whether they really do.
+====
+
+The JVT bit controls access to the JVT CSR provided by the Zcmt extension.
+
+=== Machine State Enable Register (mstateen0)
+
+[wavedrom, ,svg]
+....
+{reg: [
+{bits: 1, name: 'C'},
+{bits: 1, name: 'FCSR'},
+{bits: 1, name: 'JVT'},
+{bits: 53, name: 'WPRI'},
+{bits: 1, name: 'P1P13'},
+{bits: 1, name: 'CONTEXT'},
+{bits: 1, name: 'IMSIC'},
+{bits: 1, name: 'AIA'},
+{bits: 1, name: 'CSRIND'},
+{bits: 1, name: 'WPRI'},
+{bits: 1, name: 'ENVCFG'},
+{bits: 1, name: 'SE0'},
+], config: {bits: 64, lanes: 4, hspace:1024}}
+....
+
+The C bit controls access to any and all custom state. The FCSR and the JVT
+bits control access to the same state as controlled by the same bits in the
+sstateen0 CSR.
+
+The SE0 bit in mstateen0 controls access to the hstateen0, hstateen0h,
+and the sstateen0 CSRs.
+
+The ENVCFG bit in mstateen0 controls access to the henvcfg, henvcfgh,
+and the senvcfg CSRs.
+
+The CSRIND bit in mstateen0 controls access to the siselect, sireg*,
+vsiselect, and the vsireg* CSRs provided by the Sscsrind extensions.
+
+The IMSIC bit in mstateen0 controls access to the IMSIC state, including
+CSRs stopei and vstopei, provided by the Ssaia extension.
+
+The AIA bit in mstateen0 controls access to all state introduced by the
+Ssaia extension and is not controlled by either the CSRIND or the IMSIC
+bits.
+
+The CONTEXT bit in mstateen0 controls access to the scontext and
+hcontext CSRs provided by the Sdtrig ISA extension.
+
+The P1P13 bit in mstateen0 controls access to the hedelegh introduced by
+Privileged Specification Version 1.13.
+
+=== Hypervisor State Enable Register (hstateen0)
+
+[wavedrom, ,svg]
+....
+{reg: [
+{bits: 1, name: 'C'},
+{bits: 1, name: 'FCSR'},
+{bits: 1, name: 'JVT'},
+{bits: 54, name: 'WPRI'},
+{bits: 1, name: 'CONTEXT'},
+{bits: 1, name: 'IMSIC'},
+{bits: 1, name: 'AIA'},
+{bits: 1, name: 'CSRIND'},
+{bits: 1, name: 'WPRI'},
+{bits: 1, name: 'ENVCFG'},
+{bits: 1, name: 'SE0'},
+], config: {bits: 64, lanes: 4, hspace:1024}}
+....
+
+The C bit controls access to any and all custom state. The FCSR and the JVT
+bits control access to the same state as controlled by the same bits in the
+sstateen0 CSR.
+
+The SE0 bit in hstateen0 controls access to the sstateen0 CSR.
+
+The ENVCFG bit in hstateen0 controls access to the senvcfg CSRs.
+The CSRIND bit in hstateen0 controls access to the siselect and the
+sireg*, (really vsiselect and vsireg*) CSRs provided by the
+Sscsrind extensions.
+
+The IMSIC bit in hstateen0 controls access to the guest IMSIC state,
+including CSRs stopei (really vstopei), provided by the Ssaia extension.
+
+[NOTE]
+====
+Setting the IMSIC bit in hstateen0 to zero prevents a virtual machine from
+accessing the hart's IMSIC the same as setting hstatus.VGEIN = 0.
+The AIA bit in hstateen0 controls access to all state introduced by the
+Ssaia extension and is not controlled by either the CSRIND or the IMSIC
+bits of hstateen0.
+====
+
+The CONTEXT bit in hstateen0 controls access to the scontext CSR
+provided by the Sdtrig ISA extension.
+
+=== Usage
+
+After the writable bits of the machine-level `mstateen` CSRs are initialized to
+zeros on reset, machine-level software can set bits in these registers to
+enable less-privileged access to the controlled state. This may be either
+because machine-level software knows how to swap the state or, more likely,
+because machine-level software isn't swapping supervisor-level environments.
+(Recall that the main reason the `mstateen` CSRs must exist is so machine level
+can emulate the hypervisor extension. When machine level isn't emulating the
+hypervisor extension, it is likely there will be no need to keep any
+implemented `mstateen` bits zero.)
+
+If machine level sets any writable `mstateen` bits to nonzero, it must initialize
+the matching `hstateen` CSRs, if they exist, by writing zeros to them. And if any
+`mstateen` bits that are set to one have matching bits in the `sstateen` CSRs,
+machine-level software must also initialize those `sstateen` CSRs by writing
+zeros to them. Ordinarily, machine-level software will want to set bit 63 of
+all `mstateen` CSRs, necessitating that it write zero to all `hstateen` CSRs.
+
+Software should ensure that all writable bits of `sstateen` CSRs are initialized
+to zeros when an OS at supervisor level is first entered. The OS can then set
+bits in these registers to enable user-level access to the controlled state,
+presumably because it knows how to context-swap the state.
+
+For the `sstateen` CSRs whose access by a guest OS is permitted by bit 63 of the
+corresponding `hstateen` CSRs, a hypervisor must include the `sstateen` CSRs in the
+context it swaps for a guest OS. When it starts a new guest OS, it must ensure
+the writable bits of those `sstateen` CSRs are initialized to zeros, and it must
+emulate accesses to any other `sstateen` CSRs.
+
+If software at any privilege level does not support multiple contexts for
+less-privilege levels, then it may choose to maximize less-privileged access to
+all state by writing a value of all ones to the `stateen` CSRs at its level (the
+`mstateen` CSRs for machine level, the `sstateen` CSRs for an OS, and the `hstateen`
+CSRs for a hypervisor), without knowing all the state to which it is granting
+access. This is justified because there is no risk of a covert channel between
+execution contexts at the less-privileged level when only one context exists
+at that level. This situation is expected to be common for machine level, and
+it might also arise, for example, for a type-1 hypervisor that hosts only a
+single guest virtual machine.
+
+=== Possible expansion
+
+If a need is anticipated, the set of `stateen` CSRs could in the future be
+doubled by adding these:
+
+`0x38C mstateen4` `0x39C mstateen4h`
+
+`0x38D mstateen5` `0x39D mstateen5h`
+
+`0x38E mstateen6` `0x39E mstateen6h`
+
+`0x38F mstateen7` `0x39F mstateen7h`
+
+`0x18C sstateen4`
+
+`0x18D sstateen5`
+
+`0x18E sstateen6`
+
+`0x18F sstateen7`
+
+`0x68C hstateen4` `0x69C hstateen4h`
+
+`0x68D hstateen5` `0x69D hstateen5h`
+
+`0x68E hstateen6` `0x69E hstateen6h`
+
+`0x68F hstateen7` `0x69F hstateen7h`
+
+These additional CSRs are not a definite part of the original proposal because
+it is unclear whether they will ever be needed, and it is believed the rate of
+consumption of bits in the first group, registers numbered 0-3, will be slow
+enough that any looming shortage will be perceptible many years in advance. At
+the moment, it is not known even how many years it may take to exhaust just
+`mstateen0`, `sstateen0`, and `hstateen0`. \ No newline at end of file
diff --git a/src/sscofpmt.adoc b/src/sscofpmt.adoc
new file mode 100644
index 0000000..101c15f
--- /dev/null
+++ b/src/sscofpmt.adoc
@@ -0,0 +1,189 @@
+[[Sscofpmf]]
+== "Sscofpmf" Count Overflow and Mode-Based Filtering Extension, Version 1.0.0
+
+The current Privileged specification defines mhpmevent CSRs to select and
+control event counting by the associated hpmcounter CSRs, but provides no
+standardization of any fields within these CSRs. For at least Linux-class
+rich-OS systems it is desirable to standardize certain basic features that are
+broadly desired (and have come up over the past year plus on RISC-V lists, as
+well as have been the subject of past proposals). This enables there to be
+standard upstream software support that eliminates the need for implementations
+to provide their own custom software support.
+
+This extension serves to accomplish exactly this within the existing mhpmevent
+CSRs (and correspondingly avoids the unnecessary creation of whole new sets of
+CSRs - past just one new CSR).
+
+This extension sticks to addressing two basic well-understood needs that have
+been requested by various people. To make it easy to understand the deltas from
+the current Priv 1.11/1.12 specs, this is written as the actual exact changes
+to be made to existing paragraphs of Priv spec text (or additional paragraphs
+within the existing text).
+
+The extension name is "Sscofpmf" ('Ss' for Privileged arch and Supervisor-level
+extensions, and 'cofpmf' for Count OverFlow and Privilege Mode Filtering).
+
+Note that the new count overflow interrupt will be treated as a standard local
+interrupt that is assigned to bit 13 in the mip/mie/sip/sie registers.
+
+=== Machine Level Additions
+
+==== Hardware Performance Monitor
+
+This extension expands the hardware performance monitor description and extends
+the mhpmevent registers to 64 bits (in RV32) as follows:
+
+The hardware performance monitor includes 29 additional 64-bit event counters and 29 associated 64-bit event selector registers - the mhpmcounter3–mhpmcounter31 and mhpmevent3–mhpmevent31 CSRs.
+
+The mhpmcounters are WARL registers that support up to 64 bits of precision on
+RV32 and RV64.
+
+The mhpmevent__n__ registers are WARL registers that control which event causes
+the corresponding counter to increment and what happens when the corresponding
+count overflows. Currently just a few bits are defined here. Past this, the
+actual selection and meaning of events is defined by the platform, but
+(mhpmevent == 0) is defined to mean “no event" and that the corresponding
+counter will never be incremented. Typically the lower bits of mhpmevent will
+be used for event selection purposes.
+
+On RV32 only, accesses to the mcycle, minstret, mhpmcounter__n__, and
+mhpmevent__n__ CSRs access the low 32 bits, while accesses to the mcycleh,
+minstreth, mhpmcounter__n__h, and mhpmevent__n__h CSRs access bits 63–32 of the
+corresponding counter or event selector. The proposed CSR numbers for
+mhpmevent__n__h are 0x723 - 0x73F.
+
+The following bits are added to mhpmevent:
+
+bit [63] +++OF+++ - Overflow status and interrupt disable bit that is set when counter overflows
+
+bit [62] +++MINH+++ - If set, then counting of events in M-mode is inhibited
+
+bit [61] +++SINH+++ - If set, then counting of events in S/HS-mode is inhibited
+
+bit [60] +++UINH+++ - If set, then counting of events in U-mode is inhibited
+
+bit [59] +++VSINH+++ - If set, then counting of events in VS-mode is inhibited
+
+bit [58] +++VUINH+++ - If set, then counting of events in VU-mode is inhibited
+
+bit [57] 0 - Reserved for possible future modes
+
+bit [56] 0 - Reserved for possible future modes
+
+Each of the five ``x``INH bits, when set, inhibit counting of events while in
+privilege mode ``x``. All-zeroes for these bits results in counting of events in
+all modes.
+
+The OF bit is set when the corresponding hpmcounter overflows, and remains set
+until written by software. Since hpmcounter values are unsigned values,
+overflow is defined as unsigned overflow of the implemented counter bits. Note
+that there is no loss of information after an overflow since the counter wraps
+around and keeps counting while the sticky OF bit remains set.
+
+If supervisor mode is implemented, the 32-bit scountovf register contains
+read-only shadow copies of the OF bits in all 32 mhpmevent registers.
+
+If an hpmcounter overflows while the associated OF bit is zero, then a "count
+overflow interrupt request" is generated. If the OF bit is one, then no
+interrupt request is generated. Consequently the OF bit also functions as a
+count overflow interrupt disable for the associated hpmcounter.
+
+Count overflow never results from writes to the mhpmcounter__n__ or
+mhpmevent__n__ registers, only from hardware increments of counter registers.
+
+This "count overflow interrupt request" signal is treated as a standard local
+interrupt that corresponds to bit 13 in the mip/mie/sip/sie registers. The
+mip/sip LCOFIP and mie/sie LCOFIE bits are respectively the interrupt-pending
+and interrupt-enable bits for this interrupt. ('LCOFI' represents 'Local Count
+Overflow Interrupt'.)
+
+Generation of a "count overflow interrupt request" by an hpmcounter sets the
+LCOFIP bit in the mip/sip registers and sets the associated OF bit. The mideleg
+register controls the delegation of this interrupt to S-mode versus M-mode. The
+LCOFIP bit is cleared by software before servicing the count overflow interrupt
+resulting from one or more count overflows.
+
+[NOTE]
+.Non-normative
+====
+There are not separate overflow status and overflow interrupt enable bits. In
+practice, enabling overflow interrupt generation (by clearing the OF bit) is
+done in conjunction with initializing the counter to a starting value. Once a
+counter has overflowed, it and the OF bit must be reinitialized before another
+overflow interrupt can be generated.
+====
+
+[NOTE]
+.Non-normative
+====
+Software can distinguish newly overflowed counters (yet to be serviced by an
+overflow interrupt handler) from overflowed counters that have already been
+serviced or that are configured to not generate an interrupt on overflow, by
+maintaining a bit mask reflecting which counters are active and due to
+eventually overflow.
+====
+
+==== Machine Interrupt Registers (mip and mie)
+
+This extension adds the description of the LCOFIP/LCOFIE bits in these
+registers (and modifies related text) as follows:
+
+LCOFIP is added to mip in <<mipreg-standard>> as bit 13. LCOFIP is added to mie in
+<<miereg-standard>> as bit 13.
+
+If the Sscofpmf extension is implemented, bits mip.LCOFIP and mie.LCOFIE are
+the interrupt-pending and interrupt-enable bits for local count overflow
+interrupts. LCOFIP is read-write in mip and reflects the occurrence of a local
+count overflow interrupt request resulting from any of the mhpmevent__n__.OF
+bits being set. If the Sscofpmf extension is not implemented, these LCOFIP and
+LCOFIE bits are hardwired to zeros.
+
+Multiple simultaneous interrupts destined for different privilege modes are
+handled in decreasing order of destined privilege mode. Multiple simultaneous
+interrupts destined for the same privilege mode are handled in the following
+decreasing priority order: MEI, MSI, MTI, SEI, SSI, STI, LCOFI.
+
+=== Supervisor Level Additions
+
+==== Supervisor Interrupt Registers (sip and sie)
+
+This extension adds the description of the LCOFIP/LCOFIE bits in these
+registers (and modifies related text) as follows:
+
+LCOFIP is added to sip in <<sipreg-standard>> as bit 13. LCOFIP is added to sie in
+<<siereg-standard>> as bit 13.
+
+If the Sscofpmf extension is implemented, bits sip.LCOFIP and sie.LCOFIE are
+the interrupt-pending and interrupt-enable bits for local count overflow
+interrupts. LCOFIP is read-write in sip and reflects the occurrence of a local
+count overflow interrupt request resulting from any of the mhpmevent__n__.OF
+bits being set. If the Sscofpmf extension is not implemented, these LCOFIP and
+LCOFIE bits are hardwired to zeros.
+
+Each standard interrupt type (LCOFI, SEI, STI, or SSI) may not be implemented,
+in which case the corresponding interrupt-pending and interrupt-enable bits are
+hardwired to zeros. All bits in sip and sie are WARL fields.
+
+Multiple simultaneous interrupts destined for supervisor mode are handled in
+the following decreasing priority order: SEI, SSI, STI, LCOFI.
+
+==== Supervisor Count Overflow (scountovf)
+
+This extension adds this new CSR.
+
+The scountovf CSR is a 32-bit read-only register that contains shadow copies of
+the OF bits in the 29 mhpmevent CSRs (mhpmevent__3__ - mhpmevent__31__) - where
+scountovf bit _X_ corresponds to mhpmevent__X__. The proposed CSR number is
+0xDA0.
+
+This register enables supervisor-level overflow interrupt handler software to
+quickly and easily determine which counter(s) have overflowed (without needing
+to make an execution environment call or series of calls ultimately up to
+M-mode).
+
+Read access to bit _X_ is subject to the same mcounteren (or mcounteren and
+hcounteren) CSRs that mediate access to the hpmcounter CSRs by S-mode (or
+VS-mode). In M and S modes, scountovf bit _X_ is readable when mcounteren bit
+_X_ is set, and otherwise reads as zero. Similarly, in VS mode, scountovf bit
+_X_ is readable when mcounteren bit _X_ and hcounteren bit _X_ are both set,
+and otherwise reads as zero. \ No newline at end of file
diff --git a/src/sstc.adoc b/src/sstc.adoc
new file mode 100644
index 0000000..8e7a8e7
--- /dev/null
+++ b/src/sstc.adoc
@@ -0,0 +1,190 @@
+[[Sstc]]
+== "Stimecmp/Vstimecmp" Extension, Version 1.0.0
+
+The current Privileged arch specification only defines a hardware mechanism for
+generating machine-mode timer interrupts (based on the mtime and mtimecmp
+registers). With the resultant requirement that timer services for
+S-mode/HS-mode (and for VS-mode) have to all be provided by M-mode - via SBI
+calls from S/HS-mode up to M-mode (or VS-mode calls to HS-mode and then to
+M-mode). M-mode software then multiplexes these multiple logical timers onto
+its one physical M-mode timer facility, and the M-mode timer interrupt handler
+passes timer interrupts back down to the appropriate lower privilege mode.
+
+This extension serves to provide supervisor mode with its own CSR-based timer
+interrupt facility that it can directly manage to provide its own timer service
+(in the form of having its own stimecmp register) - thus eliminating the large
+overheads for emulating S/HS-mode timers and timer interrupt generation up in
+M-mode. Further, this extension adds a similar facility to the Hypervisor
+extension for VS-mode.
+
+To make it easy to understand the deltas from the current Priv 1.11/1.12 specs,
+this is written as the actual exact changes to be made to existing paragraphs
+of Priv spec text (or additional paragraphs within the existing text).
+
+The extension name is "Sstc" ('Ss' for Privileged arch and Supervisor-level
+extensions, and 'tc' for timecmp). This extension adds the S-level stimecmp CSR
+and the VS-level vstimecmp CSR.
+
+=== Machine and Supervisor Level Additions
+
+==== *Supervisor Timer Register (stimecmp)*
+
+This extension adds this new CSR.
+
+The stimecmp CSR is a 64-bit register and has 64-bit precision on all RV32 and
+RV64 systems. In RV32 only, accesses to the stimecmp CSR access the low 32
+bits, while accesses to the stimecmph CSR access the high 32 bits of stimecmp.
+
+The CSR numbers for stimecmp / stimecmph are 0x14D / 0x15D (within the
+Supervisor Trap Setup block of CSRs).
+
+A supervisor timer interrupt becomes pending - as reflected in the STIP bit in
+the mip and sip registers - whenever time contains a value greater than or
+equal to stimecmp, treating the values as unsigned integers. Writes to stimecmp
+are guaranteed to be reflected in STIP eventually, but not necessarily
+immediately. The interrupt remains posted until stimecmp becomes greater than
+time - typically as a result of writing stimecmp. The interrupt will be taken
+based on the standard interrupt enable and delegation rules.
+
+[NOTE]
+.Non-normative
+====
+A spurious timer interrupt might occur if an interrupt handler advances
+stimecmp then immediately returns, because STIP might not yet have fallen in
+the interim. All software should be written to assume this event is possible,
+but most software should assume this event is extremely unlikely. It is almost
+always more performant to incur an occasional spurious timer interrupt than to
+poll STIP until it falls.
+====
+
+[NOTE]
+.Non-normative
+====
+In systems in which a supervisor execution environment (SEE) provides timer
+facilities via an SBI function call, this SBI call will continue to support
+requests to schedule a timer interrupt. The SEE will simply make use of
+stimecmp, changing its value as appropriate. This ensures compatibility with
+existing S-mode software that uses this SEE facility, while new S-mode software
+takes advantage of stimecmp directly.)
+====
+
+==== Machine Interrupt Registers (mip and mie)
+
+This extension modifies the description of the STIP/STIE bits in these
+registers as follows:
+
+If supervisor mode is implemented, its mip.STIP and mie.STIE are the
+interrupt-pending and interrupt-enable bits for supervisor-level timer
+interrupts. If the stimecmp register is not implemented, STIP is writable in
+mip, and may be written by M-mode software to deliver timer interrupts to
+S-mode. If the stimecmp (supervisor-mode timer compare) register is
+implemented, STIP is read-only in mip and reflects the supervisor-level timer
+interrupt signal resulting from stimecmp. This timer interrupt signal is
+cleared by writing stimecmp with a value greater than the current time value.
+
+==== Supervisor Interrupt Registers (sip and sie)
+
+This extension modifies the description of the STIP/STIE bits in these
+registers as follows:
+
+Bits sip.STIP and sie.STIE are the interrupt-pending and interrupt-enable bits
+for supervisor level timer interrupts. If implemented, STIP is read-only in
+sip, and is either set and cleared by the execution environment (if stimecmp is
+not implemented), or reflects the timer interrupt signal resulting from
+stimecmp (if stimecmp is implemented). The sip.STIP bit, in response to timer
+interrupts generated by stimecmp, is set and cleared by writing stimecmp with a
+value that respectively is less than or equal to, or greater than, the current
+time value.
+
+==== Machine Counter-Enable Register (mcounteren)
+
+This extension adds to the description of the TM bit in this register as
+follows:
+
+In addition, when the TM bit in the mcounteren register is clear, attempts to
+access the stimecmp or vstimecmp register while executing in a mode less
+privileged than M will cause an illegal instruction exception. When this bit
+is set, access to the stimecmp or vstimecmp register is permitted in S-mode if
+implemented, and access to the vstimecmp register (via stimecmp) is permitted
+in VS-mode if implemented and not otherwise prevented by the TM bit in
+hcounteren.
+
+=== Hypervisor Extension Additions
+
+==== *Virtual Supervisor Timer Register (vstimecmp)*
+
+This extension adds this new CSR.
+
+The vstimecmp CSR is a 64-bit register and has 64-bit precision on all RV32 and
+RV64 systems. In RV32 only, accesses to the vstimecmp CSR access the low 32
+bits, while accesses to the vstimecmph CSR access the high 32 bits of
+vstimecmp.
+
+The proposed CSR numbers for vstimecmp / vstimecmph are 0x24D / 0x25D (within
+the Virtual Supervisor Registers block of CSRs, and mirroring the CSR numbers
+for stimecmp/stimecmph).
+
+A virtual supervisor timer interrupt becomes pending - as reflected in the
+VSTIP bit in the hip register - whenever (time + htimedelta), truncated to 64
+bits, contains a value greater than or equal to vstimecmp, treating the values
+as unsigned integers. Writes to vstimecmp and htimedelta are guaranteed to be
+reflected in VSTIP eventually, but not necessarily immediately. The interrupt
+remains posted until vstimecmp becomes greater than (time + htimedelta) -
+typically as a result of writing vstimecmp. The interrupt will be taken based
+on the standard interrupt enable and delegation rules while V=1.
+
+[NOTE]
+.Non-normative
+====
+In systems in which a supervisor execution environment (SEE) implemented by an
+HS-mode hypervisor provides timer facilities via an SBI function call, this SBI
+call will continue to support requests to schedule a timer interrupt. The SEE
+will simply make use of vstimecmp, changing its value as appropriate. This
+ensures compatibility with existing guest VS-mode software that uses this SEE
+facility, while new VS-mode software takes advantage of vstimecmp directly.)
+====
+
+==== Hypervisor Interrupt Registers (hvip, hip, and hie)
+
+This extension modifies the description of the VSTIP/VSTIE bits in the hip/hie
+registers as follows:
+
+Bits hip.VSTIP and hie.VSTIE are the interrupt-pending and interrupt-enable
+bits for VS-level timer interrupts. VSTIP is read-only in hip, and is the
+logical-OR of hvip.VSTIP and the timer interrupt signal resulting from
+vstimecmp (if vstimecmp is implemented). The hip.VSTIP bit, in response to
+timer interrupts generated by vstimecmp, is set and cleared by writing
+vstimecmp with a value that respectively is less than or equal to, or greater
+than, the current (time + htimedelta) value. The hip.VSTIP bit remains defined
+while V=0 as well as V=1.
+
+==== Hypervisor Counter-Enable Register (hcounteren)
+
+This extension adds to the description of the TM bit in this register as
+follows:
+
+In addition, when the TM bit in the hcounteren register is clear, attempts to
+access the vstimecmp register (via stimecmp) while executing in VS-mode will
+cause a virtual instruction exception if the same bit in mcounteren is set.
+When this bit and the same bit in mcounteren are both set, access to the
+vstimecmp register (if implemented) is permitted in VS-mode.
+
+=== Environment Config (menvcfg/henvcfg) Support
+
+Enable/disable bits for this extension are provided in the new menvcfg /
+henvcfg CSRs.
+
+Bit 63 of menvcfg (or bit 31 of menvcfgh) - named STCE (STimecmp Enable) -
+enables stimecmp for S-mode when set to one, and the same bit of henvcfg
+enables vstimecmp for VS-mode. These STCE bits are WARL and are hard-wired to 0
+when this extension is not implemented.
+
+When STCE in menvcfg is zero, an attempt to access stimecmp or vstimecmp in a
+mode other than M-mode raises an illegal instruction exception, STCE in henvcfg
+is read-only zero, and STIP in mip and sip reverts to its defined behavior as
+if this extension is not implemented.
+
+When STCE in menvcfg is one but STCE in henvcfg is zero, an attempt to access
+stimecmp (really vstimecmp) when V = 1 raises a virtual instruction exception,
+and VSTIP in hip reverts to its defined behavior as if this extension is not
+implemented. \ No newline at end of file
diff --git a/src/supervisor.adoc b/src/supervisor.adoc
index 2a376d6..e9f2855 100644
--- a/src/supervisor.adoc
+++ b/src/supervisor.adoc
@@ -1,5 +1,5 @@
[[supervisor]]
-== Supervisor-Level ISA, Version 1.12
+== Supervisor-Level ISA, Version 1.13
This chapter describes the RISC-V supervisor-level architecture, which
contains a common core that is used with various supervisor-level
@@ -103,7 +103,7 @@ destination register.
If UXLEN latexmath:[$<$] SXLEN, user-mode instruction-fetch addresses
and load and store effective addresses are taken modulo
latexmath:[$2^{\text{UXLEN}}$]. For example, when UXLEN=32 and SXLEN=64,
-user-mode memory accesses reference the lowest of the address space.
+user-mode memory accesses reference the lowest 4 GiB of the address space.
[[sum]]
===== Memory Privilege in `sstatus` Register
@@ -225,7 +225,7 @@ SXLEN-bit read/write register containing interrupt enable bits.
Interrupt cause number _i_ (as reported in CSR `scause`,
<<scause>>) corresponds with bit _i_ in both `sip` and
`sie`. Bits 15:0 are allocated to standard interrupt causes only, while
-bits 16 and above are designated for platform or custom use.
+bits 16 and above are designated for platform use.
.Supervisor interrupt-pending register (`sip`).
include::images/bytefield/sip.edn[]
@@ -263,11 +263,11 @@ formatted as shown in Figures <<sipreg-standard>>
and <<siereg-standard>> respectively.
[[sipreg-standard]]
-.Standard portion (bits 15:0)of `sip`.
+.Standard portion (bits 15:0) of `sip`.
include::images/bytefield/sipreg-standard.edn[]
[[siereg-standard]]
-.Statndard portion (bits 15:0)of `sie`.
+.Standard portion (bits 15:0) of `sie`.
include::images/bytefield/siereg-standard.edn[]
@@ -287,6 +287,15 @@ interrupt-enable bits for supervisor-level software interrupts. If
implemented, SSIP is writable in `sip` and may also be set to 1 by a
platform-specific interrupt controller.
+If the Sscofpmf extension is implemented, bits `sip`.LCOFIP and `sie`.LCOFIE
+are the interrupt-pending and interrupt-enable bits for local counter-overflow
+interrupts.
+LCOFIP is read-write in `sip` and reflects the occurrence of a local
+counter-overflow overflow interrupt request resulting from any of the
+`mhpmevent__n__`.OF bits being set.
+If the Sscofpmf extension is not implemented, `sip`.LCOFIP and `sie`.LCOFIE are
+read-only zeros.
+
[NOTE]
====
Interprocessor interrupts are sent to other harts by
@@ -294,7 +303,7 @@ implementation-specific means, which will ultimately cause the SSIP bit
to be set in the recipient hart’s `sip` register.
====
-Each standard interrupt type (SEI, STI, or SSI) may not be implemented,
+Each standard interrupt type (SEI, STI, SSI, or LCOFI) may not be implemented,
in which case the corresponding interrupt-pending and interrupt-enable
bits are read-only zeros. All bits in `sip` and `sie` are *WARL* fields. The
implemented interrupts may be found by writing one to every bit location
@@ -315,7 +324,7 @@ M-mode to S-mode, they are shown as 0 in
====
Multiple simultaneous interrupts destined for supervisor mode are
-handled in the following decreasing priority order: SEI, SSI, STI.
+handled in the following decreasing priority order: SEI, SSI, STI, LCOFI.
==== Supervisor Timers and Performance Counters
@@ -336,9 +345,9 @@ The counter-enable register `scounteren` is a 32-bit register that
controls the availability of the hardware performance monitoring
counters to U-mode.
-When the CY, TM, IR, or HPM_n_ bit in the `scounteren` register is
+When the CY, TM, IR, or HPM__n__ bit in the `scounteren` register is
clear, attempts to read the `cycle`, `time`, `instret`, or `hpmcountern`
-register while executing in U-mode will cause an illegal instruction
+register while executing in U-mode will cause an illegal-instruction
exception. When one of these bits is set, access to the corresponding
register is permitted.
@@ -425,6 +434,8 @@ include::images/bytefield/scausereg.edn[]
1 +
1 +
1 +
+1 +
+1 +
1
|0 +
1 +
@@ -432,7 +443,9 @@ include::images/bytefield/scausereg.edn[]
5 +
6-8 +
9 +
-10-15 +
+10-12 +
+13 +
+14-15 +
&#8805;16
|_Reserved_ +
Supervisor software interrupt +
@@ -441,6 +454,8 @@ Supervisor timer interrupt +
_Reserved_ +
Supervisor external interrupt +
_Reserved_ +
+Counter-overflow interrupt +
+_Reserved_ +
_Designated for platform use_
|0 +
@@ -478,7 +493,10 @@ _Designated for platform use_
13 +
14 +
15 +
-16-23 +
+16-17 +
+18 +
+19 +
+20-23 +
24-31 +
32-47 +
48-63 +
@@ -499,6 +517,9 @@ Load page fault +
_Reserved_ +
Store/AMO page fault +
_Reserved_ +
+Software check +
+Hardware error +
+_Reserved_ +
_Designated for custom use_ +
_Reserved_ +
_Designated for custom use_ +
@@ -513,7 +534,9 @@ S-mode, `stval` is written with exception-specific information to assist
software in handling the trap. Otherwise, `stval` is never written by
the implementation, though it may be explicitly written by software. The
hardware platform will specify which exceptions must set `stval`
-informatively and which may unconditionally set it to zero.
+informatively, which may unconditionally set it to zero, and which may
+exhibit either behavior, depending on the underlying event that caused the
+exception.
If `stval` is written with a nonzero value when a breakpoint,
address-misaligned, access-fault, or page-fault exception occurs on an
@@ -536,7 +559,7 @@ address of the portion of the instruction that caused the fault, while
`sepc` will point to the beginning of the instruction.
The `stval` register can optionally also be used to return the faulting
-instruction bits on an illegal instruction exception (`sepc` points to
+instruction bits on an illegal-instruction exception (`sepc` points to
the faulting instruction in memory). If `stval` is written with a
nonzero value when an illegal-instruction exception occurs, then `stval`
will contain the shortest of:
@@ -643,6 +666,10 @@ The definitions of the CBCFE and CBIE fields will be furnished by the
forthcoming Zicbom extension. Their allocations within `senvcfg` may
change prior to the ratification of that extension.
+The definition of the PMM field will be furnished by the forthcoming
+Ssnpm extension. Its allocation within `senvcfg` may change prior to the
+ratification of that extension.
+
[[satp]]
==== Supervisor Address Translation and Protection (`satp`) Register
@@ -651,7 +678,7 @@ shown in <<rv32satp>> for SXLEN=32 and
<<rv64satp>> for SXLEN=64, which controls
supervisor-mode address translation and protection. This register holds
the physical page number (PPN) of the root page table, i.e., its
-supervisor physical address divided by ; an address space identifier
+supervisor physical address divided by 4 KiB; an address space identifier
(ASID), which facilitates address-translation fences on a
per-address-space basis; and the MODE field, which selects the current
address-translation scheme. Further details on the access to this
@@ -673,7 +700,7 @@ corresponding to main memory be representable.
====
[[rv64satp]]
-.Supervisor address translation and protection register `satp` when SXLEN=64, for MODE values Bare, Sv39, Sv38, and Sv57.
+.Supervisor address translation and protection register `satp` when SXLEN=64, for MODE values Bare, Sv39, Sv48, and Sv57.
include::images/bytefield/rv64satp.edn[]
[NOTE]
@@ -725,7 +752,7 @@ Implementations are not required to support all MODE settings, and if
`satp` is written with an unsupported MODE, the entire write has no
effect; no fields in `satp` are modified.
-The number of ASID bits is  and may be zero. The number of implemented
+The number of ASID bits is UNSPECIFIED and may be zero. The number of implemented
ASID bits, termed _ASIDLEN_, may be determined by writing one to every
bit position in the ASID field, then reading back the value in `satp` to
see which bit positions in the ASID field hold a one. The
@@ -882,20 +909,20 @@ The behavior of SFENCE.VMA depends on _rs1_ and _rs2_ as follows:
made to any level of the page tables, for all address spaces. The fence
also invalidates all address-translation cache entries, for all address
spaces.
-* If __rs1__=`x0` and __rs2__&#8805;``x0``, the fence orders all
+* If __rs1__=`x0` and __rs2__&#8800;``x0``, the fence orders all
reads and writes made to any level of the page tables, but only for the
address space identified by integer register _rs2_. Accesses to _global_
mappings (see <<translation>>) are not ordered. The
fence also invalidates all address-translation cache entries matching
the address space identified by integer register _rs2_, except for
entries containing global mappings.
-* If __rs1__&#8805;``x0`` and __rs2__=`x0`, the fence orders only
+* If __rs1__&#8800;``x0`` and __rs2__=`x0`, the fence orders only
reads and writes made to leaf page table entries corresponding to the
virtual address in __rs1__, for all address spaces. The fence also
invalidates all address-translation cache entries that contain leaf page
table entries corresponding to the virtual address in _rs1_, for all
address spaces.
-* If __rs1__&#8805;``x0`` and __rs2__&#8805;``x0``, the
+* If __rs1__&#8800;``x0`` and __rs2__&#8800;``x0``, the
fence orders only reads and writes made to leaf page table entries
corresponding to the virtual address in _rs1_, for the address space
identified by integer register _rs2_. Accesses to global mappings are
@@ -908,7 +935,7 @@ If the value held in _rs1_ is not a valid virtual address, then the
SFENCE.VMA instruction has no effect. No exception is raised in this
case.
-When __rs2__&#8805;``x0``, bits SXLEN-1:ASIDMAX of the value held
+When __rs2__&#8800;``x0``, bits SXLEN-1:ASIDMAX of the value held
in _rs2_ are reserved for future standard use. Until their use is
defined by a standard extension, they should be zeroed by software and
ignored by current implementations. Furthermore, if
@@ -1031,8 +1058,8 @@ attractive for its simplicity and possibly better scalability.
====
For implementations that make `satp`.MODE read-only zero (always Bare),
-attempts to execute an SFENCE.VMA instruction might raise an illegal
-instruction exception.
+attempts to execute an SFENCE.VMA instruction might raise an
+illegal-instruction exception.
[[sv32]]
=== Sv32: Page-Based 32-bit Virtual-Memory Systems
@@ -1202,7 +1229,7 @@ either mapping being used.
Global mappings need not be stored redundantly in address-translation
caches for multiple ASIDs. Additionally, they need not be flushed from
local address-translation caches when an SFENCE.VMA instruction is
-executed with __rs2__&#8805;``x0``.
+executed with __rs2__&#8800;``x0``.
====
The RSW field is reserved for use by supervisor software; the
@@ -1213,34 +1240,68 @@ indicates the virtual page has been read, written, or fetched from since
the last time the A bit was cleared. The D bit indicates the virtual
page has been written since the last time the D bit was cleared.
-Two schemes to manage the A and D bits are permitted:
-
-* When a virtual page is accessed and the A bit is clear, or is written
-and the D bit is clear, a page-fault exception is raised.
-* When a virtual page is accessed and the A bit is clear, or is written
-and the D bit is clear, the implementation sets the corresponding bit(s)
-in the PTE. The PTE update must be atomic with respect to other accesses
-to the PTE, and must atomically check that the PTE is valid and grants
-sufficient permissions. Updates of the A bit may be performed as a
-result of speculation, but updates to the D bit must be exact (i.e., not
-speculative), and observed in program order by the local hart.
-Furthermore, the PTE update must appear in the global memory order no
-later than the explicit memory access, or any subsequent explicit memory
-access to that virtual page by the local hart. The ordering on loads and
-stores provided by FENCE instructions and the acquire/release bits on
-atomic instructions also orders the PTE updates associated with those
-loads and stores as observed by remote harts.
-+
-The PTE update is not required to be atomic with respect to the explicit
-memory access that caused the update, and the sequence is interruptible.
-However, the hart must not perform the explicit memory access before the
-PTE update is globally visible.
+Two schemes to manage the A and D bits are defined:
+
+* The _Svade_ extension: when a virtual page is accessed and the A bit is
+ clear, or is written and the D bit is clear, a page-fault exception is
+ raised.
+
+* When the Svade extension is not implemented, the following scheme applies. +
+ +
+ When a virtual page is accessed and the A bit is clear, the PTE is
+ updated to set the A bit. When the virtual page is written and the D
+ bit is clear, the PTE is updated to set the D bit. When G-stage address
+ translation is in use and is not Bare, the G-stage virtual pages may be
+ accessed or written by implicit accesses to VS-level memory management
+ data structures, such as page tables. +
+ +
+ When two-stage address translation is in use, an explicit access may
+ cause both VS-stage and G-stage PTEs to be updated. The following rules
+ apply to all PTE updates caused by an explicit or an implicit memory
+ accesses. +
+ +
+ The PTE update must be atomic with respect to other accesses to the
+ PTE, and must atomically perform all tablewalk checks for that leaf
+ PTE as part of, and before, conditionally updating the PTE value.
+ Updates of the A bit may be performed as a result of speculation, even
+ if the associated memory access ultimately is not performed
+ architecturally. However, updates to the D bit, resulting from an
+ explicit store, must be exact (i.e., non-speculative), and observed in
+ program order by the local hart. When two-stage address translation is
+ active, updates of the D bit in G-stage PTEs may be performed as a
+ result of speculative updates of the A bit in VS-stage PTEs. +
+ +
+ The PTE update must appear in the global memory order before the
+ memory access that caused the PTE update and before any subsequent
+ explicit memory access to that virtual page by the local hart. The
+ ordering on loads and stores provided by FENCE instructions and the
+ acquire/release bits on atomic instructions also orders the PTE updates
+ associated with those loads and stores as observed by remote harts. +
+ +
+ The PTE update is not required to be atomic with respect to the memory
+ access that caused the update and a trap may occur between the PTE
+ update and the memory access that caused the PTE update. If a trap
+ occurs then the A and/or D bit may be updated but the memory access
+ that caused the PTE update might not occur. The hart must not perform
+ the memory access that caused the PTE update before the PTE update is
+ globally visible. +
+ +
+ The page tables must be located in memory with hardware page-table
+ write access and _RsrvEventual_ PMA.
All harts in a system must employ the same PTE-update scheme as each
other.
[NOTE]
====
+The PTE updates due to memory accesses ordered-after a FENCE are not
+themselves ordered by the FENCE.
+
+Simpler implementations may order the Page Table Entry (PTE) update
+to precede all subsequent explicit memory accesses, as opposed to
+ensuring that the PTE update is precisely sequenced before subsequent
+explicit memory accesses to the associated virtual page.
+
Prior versions of this specification required PTE A bit updates to be
exact, but allowing the A bit to be updated as a result of speculation
simplifies the implementation of address translation prefetchers. System
@@ -1281,34 +1342,44 @@ standard extension, the LR/SC reservation set must lie completely within
a single base physical page (i.e., a naturally aligned 4 KiB physical-memory
region).
+On some implementations, misaligned loads, stores, and instruction
+fetches may also be decomposed into multiple accesses, some of which may
+succeed before a page-fault exception occurs. In particular, a
+portion of a misaligned store that passes the exception check may become
+visible, even if another portion fails the exception check. The same behavior
+may manifest for stores wider than XLEN bits (e.g., the FSD instruction
+in RV32D), even when the store address is naturally aligned.
+
+
[[sv32algorithm]]
==== Virtual Address Translation Process
A virtual address _va_ is translated into a physical address _pa_ as follows:
-. Let _a_ be ``satp``.__ppn__ X PAGESIZE, and let __i__= LEVELS - 1. (For Sv32, PAGESIZE=2^12^ and LEVELS=2.) The `satp` register must be
+. Let _a_ be ``satp``.__ppn__×PAGESIZE, and let __i__=LEVELS-1. (For Sv32, PAGESIZE=2^12^ and LEVELS=2.) The `satp` register must be
_active_, i.e., the effective privilege mode must be S-mode or U-mode.
-. Let _pte_ be the value of the PTE at address __a__+__va.vpn[i] X PTESIZE. (For Sv32, PTESIZE=4.) If accessing _pte_ violates a PMA or PMP check, raise an access-fault exception corresponding to the original access type.
-. If _pte.v_=0, or if _pte.r_=0 and _pte.w_=1, or if any bits or encodings that are reserved for future standard use are set within _pte_, stop and raise a page-fault exception corresponding to the original access type.
-. Otherwise, the PTE is valid. If __pte.r__=1 or __pte.x__=1, go to step 5. Otherwise, this PTE is a pointer to the next level of the page table. Let __i=i__-1. If i<0, stop and raise a page-fault exception corresponding to the original access type. Otherwise, let
-__a=pte.ppn__ X PAGESIZE and go to step 2.
+. Let _pte_ be the value of the PTE at address __a__+__va__.__vpn__[__i__]×PTESIZE. (For Sv32, PTESIZE=4.) If accessing _pte_ violates a PMA or PMP check, raise an access-fault exception corresponding to the original access type.
+. If _pte_._v_=0, or if _pte_._r_=0 and _pte_._w_=1, or if any bits or encodings that are reserved for future standard use are set within _pte_, stop and raise a page-fault exception corresponding to the original access type.
+. Otherwise, the PTE is valid. If __pte__.__r__=1 or __pte__.__x__=1, go to step 5. Otherwise, this PTE is a pointer to the next level of the page table. Let __i=i__-1. If __i__<0, stop and raise a page-fault exception corresponding to the original access type. Otherwise, let
+__a__=__pte__.__ppn__×PAGESIZE and go to step 2.
. A leaf PTE has been found. Determine if the requested memory access is
-allowed by the _pte.r_, _pte.w_, _pte.x_, and _pte.u_ bits, given the current privilege mode and the value of the SUM and MXR fields of the `mstatus` register. If not, stop and raise a page-fault exception corresponding to the original access type.
-. If _i>0_ and _pte.ppn_[i-1:0] ≠ 0, this is a misaligned superpage; stop and raise a page-fault exception corresponding to the original access type.
-. If _pte.a_=0, or if the original memory access is a store and _pte.d_=0, either raise a page-fault exception corresponding to the original access type, or:
+allowed by the _pte_._r_, _pte_._w_, _pte_._x_, and _pte_._u_ bits, given the current privilege mode and the value of the SUM and MXR fields of the `mstatus` register. If not, stop and raise a page-fault exception corresponding to the original access type.
+. If _i>0_ and _pte_._ppn_[__i__-1:0] ≠ 0, this is a misaligned superpage; stop and raise a page-fault exception corresponding to the original access type.
+. If _pte_._a_=0, or if the original memory access is a store and _pte_._d_=0:
+* If the Svade extension is implemented, stop and raise a page-fault exception corresponding to the original access type.
* If a store to _pte_ would violate a PMA or PMP check,
raise an access-fault exception corresponding to the original access
type.
* Perform the following steps atomically:
-** Compare _pte_ to the value of the PTE at address __a__+__va.vpn[i]__ X PTESIZE.
-** If the values match, set _pte.a_ to 1 and, if the
-original memory access is a store, also set _pte.d_ to 1.
-** If the comparison fails, return to step 2
+** Compare _pte_ to the value of the PTE at address __a__+__va.vpn__[__i__]×PTESIZE.
+** If the values match, set _pte_._a_ to 1 and, if the
+original memory access is a store, also set _pte_._d_ to 1.
+** If the comparison fails, return to step 2.
. The translation is successful. The translated physical address is
given as follows:
* _pa.pgoff_ = _va.pgoff_.
-* If _i_>0, then this is a superpage translation and __pa.ppn[i__-1:0] = _va.vpn[i_-1:0].
-* _pa.ppn_[LEVELS - 1:__i__] = _pte.ppn_[LEVELS - 1:__i__].
+* If _i_>0, then this is a superpage translation and __pa.ppn__[__i__-1:0] = __va.vpn__[__i__-1:0].
+* _pa.ppn_[LEVELS-1:__i__] = _pte_._ppn_[LEVELS-1:__i__].
All implicit accesses to the address-translation data structures in this
algorithm are performed using width PTESIZE.
@@ -1413,9 +1484,9 @@ the differences between the schemes.
====
We specified multiple virtual memory systems for RV64 to relieve the
tension between providing a large address space and minimizing
-address-translation cost. For many systems, of virtual-address space is
+address-translation cost. For many systems, 39 bits of virtual-address space is
ample, and so Sv39 suffices. Sv48 increases the virtual address space to
-, but increases the physical memory capacity dedicated to page tables,
+48 bits, but increases the physical memory capacity dedicated to page tables,
the latency of page-table traversals, and the size of hardware
structures that store virtual addresses. Sv57 increases the virtual
address space, page table capacity requirement, and translation latency
@@ -1613,7 +1684,7 @@ The Svnapot extension depends on Sv39.
.Page table entry encodings when __pte__.N=1
[%autowidth,float="center",align="center",cols="^,^,<,^",options="header"]
|===
-|i |_pte.ppn[i]_ |Description |_pte.napot_bits_
+|i |_pte_._ppn_[_i_] |Description |_pte_.__napot_bits__
|0 +
0 +
0 +
@@ -1647,14 +1718,14 @@ except that:
* If the encoding in _pte_ is valid according to
<<ptenapot>>, then instead of returning the original
value of _pte_, implicit reads of a NAPOT PTE return a copy
-of _pte_ in which __pte.ppn[i][pte.napot_bits__-1:0] is replaced by
-__vpn[i][pte.napot_bits__-1:0]. If the encoding in _pte_ is reserved according to
+of _pte_ in which __pte__.__ppn__[__i__][__pte__.__napot_bits__-1:0] is replaced by
+__vpn__[__i__][__pte__.__napot_bits__-1:0]. If the encoding in _pte_ is reserved according to
<<ptenapot>>, then a page-fault exception must be raised.
* Implicit reads of NAPOT page table entries may create
address-translation cache entries mapping
-_a_ + _j_ X PTESIZE to a copy of _pte_ in which _pte.ppn[i][pte.napot_bits_-1:0]
+_a_ + _j_×PTESIZE to a copy of _pte_ in which _pte_._ppn_[_i_][_pte_.__napot_bits__-1:0]
is replaced by _vpn[i][pte.napot_bits_-1:0], for any or all _j_ such that
-__j >> napot_bits__ = __vpn[i] >> napot_bits__, all for the address space identified in _satp_ as loaded by step 1.
+__j__ >> __napot_bits__ = __vpn__[__i__] >> __napot_bits__, all for the address space identified in _satp_ as loaded by step 1.
[NOTE]
====
@@ -1711,7 +1782,7 @@ __
[%autowidth,float="center",align="center",cols="^,^,<,^",options="header"]
|===
-|i |_pte.ppn[i]_ |Description |_pte.napot_bits_
+|i |_pte_._ppn_[_i_] |Description |_pte_.__napot_bits__
|0 +
0 +
0 +
@@ -1756,7 +1827,7 @@ allow system software to determine which sizes are supported.
Other sizes may remain deliberately excluded, so that PPN bits not being
used to indicate a valid NAPOT region size (e.g., the least-significant
-bit of _pte.ppn[i]_) may be repurposed for other uses in the
+bit of _pte_._ppn_[_i_]) may be repurposed for other uses in the
future.
However, in case finer-grained intermediate page size support proves not
@@ -1775,7 +1846,7 @@ associated memory pages. The encoding for the PBMT bits is captured in
The Svpbmt extension depends on Sv39.
[[pbmt]]
-.Encodings for PBMT field in Sv39, Sv48, and Sv57 PTEs. Attributes not mentioned are inherited from PMA associated with the physical address.
+.Encodings for PBMT field in Sv39, Sv48, and Sv57 PTEs.
[%autowidth,float="center",align="center",cols="^,^,<",options="header"]
|===
|Mode |Value |Requested Memory Attributes
@@ -1793,6 +1864,13 @@ Non-cacheable, non-idempotent, strongly-ordered (I/O ordering), I/O +
_Reserved for future standard use_
|===
+Implementations may override additional PMAs not explicitly listed in
+<<pbmt>>.
+For example, to be consistent with the characteristics of a typical I/O region,
+a misaligned memory access to a page with PBMT=IO might raise an exception,
+even if the underlying region were main memory and the same access would have
+succeeded for PBMT=PMA.
+
[NOTE]
====
Future extensions may provide more and/or finer-grained control over
@@ -1822,8 +1900,8 @@ accesses for the purposes of FENCE, _.aq_, and _.rl_.
If the underlying physical memory attribute for a page is main memory,
and the page has PBMT=IO, then accesses to that page obey strong channel
-0 I/O ordering rules with respect to other accesses to physical main
-memory and to other accesses to pages with PBMT=IO. However, accesses to
+0 I/O ordering rules.
+However, accesses to
such pages are considered to be _both_ I/O and main memory accesses for
the purposes of FENCE, _.aq_, and _.rl_.
@@ -1935,14 +2013,20 @@ HINVAL.GVMA uses VMIDs instead of ASIDs.
SINVAL.VMA, HINVAL.VVMA, and HINVAL.GVMA require the same permissions
and raise the same exceptions as SFENCE.VMA, HFENCE.VVMA, and
HFENCE.GVMA, respectively. In particular, an attempt to execute any of
-these instructions in U-mode always raises an illegal instruction
+these instructions in U-mode always raises an illegal-instruction
exception, and an attempt to execute SINVAL.VMA or HINVAL.GVMA in S-mode
-or HS-mode when `mstatus`.TVM=1 also raises an illegal instruction
+or HS-mode when `mstatus`.TVM=1 also raises an illegal-instruction
exception. An attempt to execute HINVAL.VVMA or HINVAL.GVMA in VS-mode
-or VU-mode, or to execute SINVAL.VMA in VU-mode, raises a virtual
-instruction exception. When `hstatus`.VTVM=1, an attempt to execute
+or VU-mode, or to execute SINVAL.VMA in VU-mode, raises a
+virtual-instruction exception. When `hstatus`.VTVM=1, an attempt to execute
SINVAL.VMA in VS-mode also raises a virtual instruction exception.
+Attempting to execute SFENCE.W.INVAL or SFENCE.INVAL.IR in U-mode
+raises an illegal-instruction exception.
+Doing so in VU-mode raises a virtual-instruction exception.
+SFENCE.W.INVAL and SFENCE.INVAL.IR are unaffected by the `mstatus`.TVM and
+`hstatus`.VTVM fields and hence are always permitted in S-mode and VS-mode.
+
[NOTE]
====
SFENCE.W.INVAL and SFENCE.INVAL.IR instructions do not need to be
@@ -1967,4 +2051,19 @@ Simpler implementations may implement SINVAL.VMA, HINVAL.VVMA, and
HINVAL.GVMA identically to SFENCE.VMA, HFENCE.VVMA, and HFENCE.GVMA,
respectively, while implementing SFENCE.W.INVAL and SFENCE.INVAL.IR
instructions as no-ops.
-==== \ No newline at end of file
+====
+
+[[sec:svadu]]
+== "Svadu" Standard Extension for Hardware Updating of A/D Bits, Version 1.0
+
+The Svadu extension adds support and CSR controls for hardware updating of PTE A/D bits.
+
+If the Svadu extension is implemented, the `menvcfg`.ADUE field is writable.
+If the hypervisor extension is additionally implemented, the `henvcfg`.ADUE
+field is also writable.
+See <<sec:menvcfg>> and <<sec:henvcfg>> for the definitions of those fields.
+
+<<translation>> defines the semantics of hardware updating of A/D bits. When
+hardware updating of A/D bits is disabled, the Svade extension, which mandates
+exceptions when A/D bits need be set, instead takes effect.
+The Svade extension is also defined in <<translation>>.
diff --git a/src/v-st-ext.adoc b/src/v-st-ext.adoc
index 88dcf8d..194e448 100644
--- a/src/v-st-ext.adoc
+++ b/src/v-st-ext.adoc
@@ -1,9 +1,6 @@
[[vector]]
== "V" Standard Extension for Vector Operations, Version 1.0
-The specification is currently hosted at
-https://github.com/riscv/riscv-v-spec.
-
[NOTE]
====
_The base vector extension is intended to provide general support for
@@ -12,3 +9,5185 @@ with later vector extensions supporting richer functionality for certain
domains._
====
+=== Introduction
+
+This document is version 1.1-draft of the RISC-V vector extension.
+
+NOTE: This version holds updates gathered after the start of the
+public review. The spec will have a final update to version 2.0 at
+time of ratification.
+
+This spec includes the complete set of currently frozen vector
+instructions. Other instructions that have been considered during
+development but are not present in this document are not included in
+the review and ratification process, and may be completely revised or
+abandoned. Section <<sec-vector-extensions>> lists the standard
+vector extensions and which instructions and element widths are
+supported by each extension.
+
+=== Implementation-defined Constant Parameters
+
+Each hart supporting a vector extension defines two parameters:
+
+. The maximum size in bits of a vector element that any operation can produce or consume, _ELEN_ {ge} 8, which
+must be a power of 2.
+. The number of bits in a single vector register, _VLEN_ {ge} ELEN, which must be a power of 2, and must be no greater than 2^16^.
+
+Standard vector extensions (Section <<sec-vector-extensions>>) and
+architecture profiles may set further constraints on _ELEN_ and _VLEN_.
+
+NOTE: Future extensions may allow ELEN {gt} VLEN by holding one
+element using bits from multiple vector registers, but this current
+proposal does not include this option.
+
+NOTE: The upper limit on VLEN allows software to know that indices
+will fit into 16 bits (largest VLMAX of 65,536 occurs for LMUL=8 and
+SEW=8 with VLEN=65,536). Any future extension beyond 64Kib per vector
+register will require new configuration instructions such that
+software using the old configuration instructions does not see greater
+vector lengths.
+
+The vector extension supports writing binary code that under certain
+constraints will execute portably on harts with different values for
+the VLEN parameter, provided the harts support the required element
+types and instructions.
+
+NOTE: Code can be written that will expose differences in
+implementation parameters.
+
+NOTE: In general, thread contexts with active vector state cannot be
+migrated during execution between harts that have any difference in
+VLEN or ELEN parameters.
+
+=== Vector Extension Programmer's Model
+
+The vector extension adds 32 vector registers, and seven unprivileged
+CSRs (`vstart`, `vxsat`, `vxrm`, `vcsr`, `vtype`, `vl`, `vlenb`) to a
+base scalar RISC-V ISA.
+
+.New vector CSRs
+[cols="2,2,2,10"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Address | Privilege | Name | Description
+
+| 0x008 | URW | vstart | Vector start position
+| 0x009 | URW | vxsat | Fixed-Point Saturate Flag
+| 0x00A | URW | vxrm | Fixed-Point Rounding Mode
+| 0x00F | URW | vcsr | Vector control and status register
+| 0xC20 | URO | vl | Vector length
+| 0xC21 | URO | vtype | Vector data type register
+| 0xC22 | URO | vlenb | VLEN/8 (vector register length in bytes)
+|===
+
+NOTE: The four CSR numbers `0x00B`-`0x00E` are tentatively reserved
+for future vector CSRs, some of which may be mirrored into `vcsr`.
+
+==== Vector Registers
+
+The vector extension adds 32 architectural vector registers,
+`v0`-`v31` to the base scalar RISC-V ISA.
+
+Each vector register has a fixed VLEN bits of state.
+
+==== Vector Context Status in `mstatus`
+
+A vector context status field, `VS`, is added to `mstatus[10:9]` and shadowed
+in `sstatus[10:9]`. It is defined analogously to the floating-point context
+status field, `FS`.
+
+Attempts to execute any vector instruction, or to access the vector
+CSRs, raise an illegal-instruction exception when `mstatus.VS` is
+set to Off.
+
+When `mstatus.VS` is set to Initial or Clean, executing any
+instruction that changes vector state, including the vector CSRs, will
+change `mstatus.VS` to Dirty.
+Implementations may also change `mstatus.VS` from Initial or Clean to Dirty
+at any time, even when there is no change in vector state.
+
+NOTE: Accurate setting of `mstatus.VS` is an optimization. Software
+will typically use VS to reduce context-swap overhead.
+
+If `mstatus.VS` is Dirty, `mstatus.SD` is 1;
+otherwise, `mstatus.SD` is set in accordance with existing specifications.
+
+Implementations may have a writable `misa.V` field. Analogous to the
+way in which the floating-point unit is handled, the `mstatus.VS`
+field may exist even if `misa.V` is clear.
+
+NOTE: Allowing `mstatus.VS` to exist when `misa.V` is clear, enables
+vector emulation and simplifies handling of `mstatus.VS` in systems
+with writable `misa.V`.
+
+==== Vector Context Status in `vsstatus`
+
+When the hypervisor extension is present, a vector context status field, `VS`,
+is added to `vsstatus[10:9]`.
+It is defined analogously to the floating-point context status field, `FS`.
+
+When V=1, both `vsstatus.VS` and `mstatus.VS` are in effect: attempts to
+execute any vector instruction, or to access the vector CSRs, raise an
+illegal-instruction exception when either field is set to Off.
+
+When V=1 and neither `vsstatus.VS` nor `mstatus.VS` is set to Off, executing
+any instruction that changes vector state, including the vector CSRs, will
+change both `mstatus.VS` and `vsstatus.VS` to Dirty.
+Implementations may also change `mstatus.VS` or `vsstatus.VS` from Initial or
+Clean to Dirty at any time, even when there is no change in vector state.
+
+If `vsstatus.VS` is Dirty, `vsstatus.SD` is 1;
+otherwise, `vsstatus.SD` is set in accordance with existing specifications.
+
+If `mstatus.VS` is Dirty, `mstatus.SD` is 1;
+otherwise, `mstatus.SD` is set in accordance with existing specifications.
+
+For implementations with a writable `misa.V` field,
+the `vsstatus.VS` field may exist even if `misa.V` is clear.
+
+==== Vector type register, `vtype`
+
+The read-only XLEN-wide _vector_ _type_ CSR, `vtype` provides the
+default type used to interpret the contents of the vector register
+file, and can only be updated by `vset{i}vl{i}` instructions. The
+vector type determines the organization of elements in each
+vector register, and how multiple vector registers are grouped. The
+`vtype` register also indicates how masked-off elements and elements
+past the current vector length in a vector result are handled.
+
+NOTE: Allowing updates only via the `vset{i}vl{i}` instructions
+simplifies maintenance of the `vtype` register state.
+
+The `vtype` register has five fields, `vill`, `vma`, `vta`,
+`vsew[2:0]`, and `vlmul[2:0]`. Bits `vtype[XLEN-2:8]` should be
+written with zero, and non-zero values in this field are reserved.
+
+include::images/wavedrom/vtype-format.adoc[]
+
+NOTE: A small implementation supporting ELEN=32 requires only seven
+bits of state in `vtype`: two bits for `ma` and `ta`, two bits for
+`vsew[1:0]` and three bits for `vlmul[2:0]`. The illegal value
+represented by `vill` can be internally encoded using the illegal 64-bit
+combination in `vsew[1:0]` without requiring an additional storage
+bit to hold `vill`.
+
+NOTE: Further standard and custom vector extensions may extend these
+fields to support a greater variety of data types.
+
+NOTE: The primary motivation for the `vtype` CSR is to allow the
+vector instruction set to fit into a 32-bit instruction encoding
+space. A separate `vset{i}vl{i}` instruction can be used to set `vl`
+and/or `vtype` fields before execution of a vector instruction, and
+implementations may choose to fuse these two instructions into a single
+internal vector microop. In many cases, the `vl` and `vtype` values
+can be reused across multiple instructions, reducing the static and
+dynamic instruction overhead from the `vset{i}vl{i}` instructions. It
+is anticipated that a future extended 64-bit instruction encoding
+would allow these fields to be specified statically in the instruction
+encoding.
+
+===== Vector selected element width `vsew[2:0]`
+
+The value in `vsew` sets the dynamic _selected_ _element_ _width_
+(SEW). By default, a vector register is viewed as being divided into
+VLEN/SEW elements.
+
+.vsew[2:0] (selected element width) encoding
+[cols="1,1,1,1"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+3+| vsew[2:0] | SEW
+
+| 0 | 0 | 0 | 8
+| 0 | 0 | 1 | 16
+| 0 | 1 | 0 | 32
+| 0 | 1 | 1 | 64
+| 1 | X | X | Reserved
+|===
+
+NOTE: While it is anticipated the larger `vsew[2:0]` encodings
+(`100`-`111`) will be used to encode larger SEW, the encodings are
+formally _reserved_ at this point.
+
+.Example VLEN = 128 bits
+[cols=">,>"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| SEW | Elements per vector register
+
+| 64 | 2
+| 32 | 4
+| 16 | 8
+| 8 | 16
+|===
+
+The supported element width may vary with LMUL.
+
+NOTE: The current set of standard vector extensions do not vary
+supported element width with LMUL. Some future extensions may support
+larger SEWs only when bits from multiple vector registers are combined
+using LMUL. In this case, software that relies on large SEW should
+attempt to use the largest LMUL, and hence the fewest vector register
+groups, to increase the number of implementations on which the code
+will run. The `vill` bit in `vtype` should be checked after setting
+`vtype` to see if the configuration is supported, and an alternate
+code path should be provided if it is not. Alternatively, a profile
+can mandate the minimum SEW at each LMUL setting.
+
+===== Vector Register Grouping (`vlmul[2:0]`)
+
+Multiple vector registers can be grouped together, so that a single
+vector instruction can operate on multiple vector registers. The term
+_vector_ _register_ _group_ is used herein to refer to one or more
+vector registers used as a single operand to a vector instruction.
+Vector register groups can be used to provide greater execution
+efficiency for longer application vectors, but the main reason for
+their inclusion is to allow double-width or larger elements to be
+operated on with the same vector length as single-width elements. The
+vector length multiplier, _LMUL_, when greater than 1, represents the
+default number of vector registers that are combined to form a vector
+register group. Implementations must support LMUL integer values of
+1, 2, 4, and 8.
+
+
+NOTE: The vector architecture includes instructions that take multiple
+source and destination vector operands with different element widths,
+but the same number of elements. The effective LMUL (EMUL) of each
+vector operand is determined by the number of registers required to
+hold the elements. For example, for a widening add operation, such as
+add 32-bit values to produce 64-bit results, a double-width result
+requires twice the LMUL of the single-width inputs.
+
+LMUL can also be a fractional value, reducing the number of bits used
+in a single vector register. Fractional LMUL is used to increase the
+number of effective usable vector register groups when operating on
+mixed-width values.
+
+NOTE: With only integer LMUL values, a loop operating on a range of
+sizes would have to allocate at least one whole vector register
+(LMUL=1) for the narrowest data type and then would consume multiple
+vector registers (LMUL>1) to form a vector register group for each
+wider vector operand. This can limit the number of vector register groups
+available. With fractional LMUL, the widest values need occupy only a
+single vector register while narrower values can occupy a fraction of
+a single vector register, allowing all 32 architectural vector
+register names to be used for different values in a vector loop even
+when handling mixed-width values. Fractional LMUL implies portions of
+vector registers are unused, but in some cases, having more shorter
+register-resident vectors improves efficiency relative to fewer longer
+register-resident vectors.
+
+Implementations must provide fractional LMUL settings that allow the
+narrowest supported type to occupy a fraction of a vector register
+corresponding to the ratio of the narrowest supported type's width to
+that of the largest supported type's width. In general, the
+requirement is to support LMUL {ge} SEW~MIN~/ELEN, where SEW~MIN~ is
+the narrowest supported SEW value and ELEN is the widest supported SEW
+value. In the standard extensions, SEW~MIN~=8. For
+standard vector extensions with ELEN=32, fractional LMULs of 1/2 and
+1/4 must be supported. For standard vector extensions with ELEN=64,
+fractional LMULs of 1/2, 1/4, and 1/8 must be supported.
+
+NOTE: When LMUL < SEW~MIN~/ELEN, there is no guarantee
+an implementation would have enough bits in the fractional vector
+register to store at least one element, as VLEN=ELEN is a
+valid implementation choice. For example, with VLEN=ELEN=32,
+and SEW~MIN~=8, an LMUL of 1/8 would only provide four bits of
+storage in a vector register.
+
+For a given supported fractional LMUL setting, implementations must support
+SEW settings between SEW~MIN~ and LMUL * ELEN, inclusive.
+
+The use of `vtype` encodings with LMUL < SEW~MIN~/ELEN is
+__reserved__, but implementations can set `vill` if they do not
+support these configurations.
+
+NOTE: Requiring all implementations to set `vill` in this case would
+prohibit future use of this case in an extension, so to allow for a
+future definition of LMUL<SEW~MIN~/ELEN behavior, we
+consider the use of this case to be __reserved__.
+
+NOTE: It is recommended that assemblers provide a warning (not an
+error) if a `vsetvli` instruction attempts to write an LMUL < SEW~MIN~/ELEN.
+
+LMUL is set by the signed `vlmul` field in `vtype` (i.e., LMUL =
+2^`vlmul[2:0]`^).
+
+The derived value VLMAX = LMUL*VLEN/SEW represents the maximum number
+of elements that can be operated on with a single vector instruction
+given the current SEW and LMUL settings as shown in the table below.
+
+[cols="1,1,1,2,2,5,5"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+ 3+| vlmul[2:0] | LMUL | #groups | VLMAX | Registers grouped with register __n__
+
+| 1 | 0 | 0 | - | - | - | reserved
+| 1 | 0 | 1 | 1/8| 32 | VLEN/SEW/8 | `v` __n__ (single register in group)
+| 1 | 1 | 0 | 1/4| 32 | VLEN/SEW/4 | `v` __n__ (single register in group)
+| 1 | 1 | 1 | 1/2| 32 | VLEN/SEW/2 | `v` __n__ (single register in group)
+| 0 | 0 | 0 | 1 | 32 | VLEN/SEW | `v` __n__ (single register in group)
+| 0 | 0 | 1 | 2 | 16 | 2*VLEN/SEW | `v` __n__, `v` __n__+1
+| 0 | 1 | 0 | 4 | 8 | 4*VLEN/SEW | `v` __n__, ..., `v` __n__+3
+| 0 | 1 | 1 | 8 | 4 | 8*VLEN/SEW | `v` __n__, ..., `v` __n__+7
+|===
+
+When LMUL=2, the vector register group contains vector register `v`
+__n__ and vector register `v` __n__+1, providing twice the vector
+length in bits. Instructions specifying an LMUL=2 vector register group
+with an odd-numbered vector register are reserved.
+
+When LMUL=4, the vector register group contains four vector registers,
+and instructions specifying an LMUL=4 vector register group using vector
+register numbers that are not multiples of four are reserved.
+
+When LMUL=8, the vector register group contains eight vector
+registers, and instructions specifying an LMUL=8 vector register group
+using register numbers that are not multiples of eight are reserved.
+
+Mask registers are always contained in a single vector register,
+regardless of LMUL.
+
+[[sec-agnostic]]
+===== Vector Tail Agnostic and Vector Mask Agnostic `vta` and `vma`
+
+These two bits modify the behavior of destination tail elements and
+destination inactive masked-off elements respectively during the
+execution of vector instructions. The tail and inactive sets contain
+element positions that are not receiving new results during a vector
+operation, as defined in Section <<sec-inactive-defs>>.
+
+All systems must support all four options:
+
+[cols="1,1,3,3"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| `vta` | `vma` | Tail Elements | Inactive Elements
+
+| 0 | 0 | undisturbed | undisturbed
+| 0 | 1 | undisturbed | agnostic
+| 1 | 0 | agnostic | undisturbed
+| 1 | 1 | agnostic | agnostic
+|===
+
+Mask destination tail elements are always treated as tail-agnostic,
+regardless of the setting of `vta`.
+
+When a set is marked undisturbed, the corresponding set of destination
+elements in a vector register group retain the value they previously
+held.
+
+When a set is marked agnostic, the corresponding set of destination
+elements in any vector destination operand can either retain the value
+they previously held, or are overwritten with 1s. Within a single vector
+instruction, each destination element can be either left undisturbed
+or overwritten with 1s, in any combination, and the pattern of
+undisturbed or overwritten with 1s is not required to be deterministic
+when the instruction is executed with the same inputs.
+
+NOTE: The agnostic policy was added to accommodate machines with
+vector register renaming. With an undisturbed policy, all elements
+would have to be read from the old physical destination vector
+register to be copied into the new physical destination vector
+register. This causes an inefficiency when these inactive or tail
+values are not required for subsequent calculations.
+
+NOTE: The value of all 1s instead of all 0s was chosen for the
+overwrite value to discourage software developers from depending on
+the value written.
+
+NOTE: A simple in-order implementation can ignore the settings and
+simply execute all vector instructions using the undisturbed
+policy. The `vta` and `vma` state bits must still be provided in
+`vtype` for compatibility and to support thread migration.
+
+NOTE: An out-of-order implementation can choose to implement
+tail-agnostic + mask-agnostic using tail-agnostic + mask-undisturbed
+to reduce implementation complexity.
+
+NOTE: The definition of agnostic result policy is left loose to
+accommodate migrating application threads between harts on a small
+in-order core (which probably leaves agnostic regions undisturbed) and
+harts on a larger out-of-order core with register renaming (which
+probably overwrites agnostic elements with 1s). As it might be
+necessary to restart in the middle, we allow arbitrary mixing of
+agnostic policies within a single vector instruction. This allowed
+mixing of policies also enables implementations that might change
+policies for different granules of a vector register, for example,
+using undisturbed within a granule that is actively operated on but
+renaming to all 1s for granules in the tail.
+
+In addition, except for mask load instructions, any element in the
+tail of a mask result can also be written with the value the
+mask-producing operation would have calculated with `vl`=VLMAX.
+Furthermore, for mask-logical instructions and `vmsbf.m`, `vmsif.m`,
+`vmsof.m` mask-manipulation instructions, any element in the tail of
+the result can be written with the value the mask-producing operation
+would have calculated with `vl`=VLEN, SEW=8, and LMUL=8 (i.e., all
+bits of the mask register can be overwritten).
+
+NOTE: Mask tails are always treated as agnostic to reduce complexity
+of managing mask data, which can be written at bit granularity. There
+appears to be little software need to support tail-undisturbed for
+mask register values. Allowing mask-generating instructions to write
+back the result of the instruction avoids the need for logic to mask
+out the tail, except mask loads cannot write memory values to
+destination mask tails as this would imply accessing memory past
+software intent.
+
+The assembly syntax adds two mandatory flags to the `vsetvli` instruction:
+
+----
+ ta # Tail agnostic
+ tu # Tail undisturbed
+ ma # Mask agnostic
+ mu # Mask undisturbed
+
+ vsetvli t0, a0, e32, m4, ta, ma # Tail agnostic, mask agnostic
+ vsetvli t0, a0, e32, m4, tu, ma # Tail undisturbed, mask agnostic
+ vsetvli t0, a0, e32, m4, ta, mu # Tail agnostic, mask undisturbed
+ vsetvli t0, a0, e32, m4, tu, mu # Tail undisturbed, mask undisturbed
+----
+
+NOTE: Prior to v0.9, when these flags were not specified on a
+`vsetvli`, they defaulted to mask-undisturbed/tail-undisturbed. The
+use of `vsetvli` without these flags is deprecated, however, and
+specifying a flag setting is now mandatory. The default should
+perhaps be tail-agnostic/mask-agnostic, so software has to specify
+when it cares about the non-participating elements, but given the
+historical meaning of the instruction prior to introduction of these
+flags, it was decided to always require them in future assembly code.
+
+===== Vector Type Illegal `vill`
+
+The `vill` bit is used to encode that a previous `vset{i}vl{i}`
+instruction attempted to write an unsupported value to `vtype`.
+
+NOTE: The `vill` bit is held in bit XLEN-1 of the CSR to support
+checking for illegal values with a branch on the sign bit.
+
+If the `vill` bit is set, then any attempt to execute a vector instruction
+that depends upon `vtype` will raise an illegal-instruction exception.
+
+NOTE: `vset{i}vl{i}` and whole register loads and stores do not depend
+upon `vtype`.
+
+When the `vill` bit is set, the other XLEN-1 bits in `vtype` shall be
+zero.
+
+==== Vector Length Register `vl`
+
+The _XLEN_-bit-wide read-only `vl` CSR can only be updated by the
+`vset{i}vl{i}` instructions, and the _fault-only-first_ vector load
+instruction variants.
+
+The `vl` register holds an unsigned integer specifying the number of
+elements to be updated with results from a vector instruction, as
+further detailed in Section <<sec-inactive-defs>>.
+
+NOTE: The number of bits implemented in `vl` depends on the
+implementation's maximum vector length of the smallest supported
+type. The smallest vector implementation with VLEN=32 and supporting
+SEW=8 would need at least six bits in `vl` to hold the values 0-32
+(VLEN=32, with LMUL=8 and SEW=8, yields VLMAX=32).
+
+==== Vector Byte Length `vlenb`
+
+The _XLEN_-bit-wide read-only CSR `vlenb` holds the value VLEN/8,
+i.e., the vector register length in bytes.
+
+NOTE: The value in `vlenb` is a design-time constant in any
+implementation.
+
+NOTE: Without this CSR, several instructions are needed to calculate
+VLEN in bytes, and the code has to disturb current `vl` and `vtype`
+settings which require them to be saved and restored.
+
+==== Vector Start Index CSR `vstart`
+
+The _XLEN_-bit-wide read-write `vstart` CSR specifies the index of the
+first element to be executed by a vector instruction, as described in
+Section <<sec-inactive-defs>>.
+
+Normally, `vstart` is only written by hardware on a trap on a vector
+instruction, with the `vstart` value representing the element on which
+the trap was taken (either a synchronous exception or an asynchronous
+interrupt), and at which execution should resume after a resumable
+trap is handled.
+
+All vector instructions are defined to begin execution with the
+element number given in the `vstart` CSR, leaving earlier elements in
+the destination vector undisturbed, and to reset the `vstart` CSR to
+zero at the end of execution.
+
+NOTE: All vector instructions, including `vset{i}vl{i}`, reset the `vstart`
+CSR to zero.
+
+`vstart` is not modified by vector instructions that raise illegal-instruction
+exceptions.
+
+The `vstart` CSR is defined to have only enough writable bits to hold
+the largest element index (one less than the maximum VLMAX).
+
+NOTE: The maximum vector length is obtained with the largest LMUL
+setting (8) and the smallest SEW setting (8), so VLMAX_max = 8*VLEN/8 = VLEN. For example, for VLEN=256, `vstart` would have 8 bits to
+represent indices from 0 through 255.
+
+The use of `vstart` values greater than the largest element index for
+the current `vtype` setting is reserved.
+
+NOTE: It is recommended that implementations trap if `vstart` is out
+of bounds. It is not required to trap, as a possible future use of
+upper `vstart` bits is to store imprecise trap information.
+
+The `vstart` CSR is writable by unprivileged code, but non-zero
+`vstart` values may cause vector instructions to run substantially
+slower on some implementations, so `vstart` should not be used by
+application programmers. A few vector instructions cannot be
+executed with a non-zero `vstart` value and will raise an illegal
+instruction exception as defined below.
+
+NOTE: Making `vstart` visible to unprivileged code supports user-level
+threading libraries.
+
+Implementations are permitted to raise illegal instruction exceptions when
+attempting to execute a vector instruction with a value of `vstart` that the
+implementation can never produce when executing that same instruction with
+the same `vtype` setting.
+
+NOTE: For example, some implementations will never take interrupts during
+execution of a vector arithmetic instruction, instead waiting until the
+instruction completes to take the interrupt. Such implementations are
+permitted to raise an illegal instruction exception when attempting to execute
+a vector arithmetic instruction when `vstart` is nonzero.
+
+NOTE: When migrating a software thread between two harts with
+different microarchitectures, the `vstart` value might not be
+supported by the new hart microarchitecture. The runtime on the
+receiving hart might then have to emulate instruction execution up to the
+next supported `vstart` element position. Alternatively, migration events
+can be constrained to only occur at mutually supported `vstart`
+locations.
+
+==== Vector Fixed-Point Rounding Mode Register `vxrm`
+
+The vector fixed-point rounding-mode register holds a two-bit
+read-write rounding-mode field in the least-significant bits
+(`vxrm[1:0]`). The upper bits, `vxrm[XLEN-1:2]`, should be written as
+zeros.
+
+The vector fixed-point rounding-mode is given a separate CSR address
+to allow independent access, but is also reflected as a field in
+`vcsr`.
+
+NOTE: A new rounding mode can be set while saving the original
+rounding mode using a single `csrwi` instruction.
+
+The fixed-point rounding algorithm is specified as follows.
+Suppose the pre-rounding result is `v`, and `d` bits of that result are to be
+rounded off.
+Then the rounded result is `(v >> d) + r`, where `r` depends on the rounding
+mode as specified in the following table.
+
+.vxrm encoding
+//[cols="1,1,4,10,5"]
+[%autowidth,float="center",align="center",cols="<,<,<,<,<",options="header"]
+|===
+2+| `vxrm[1:0]` | Abbreviation | Rounding Mode | Rounding increment, `r`
+
+| 0 | 0 | rnu | round-to-nearest-up (add +0.5 LSB) | `v[d-1]`
+| 0 | 1 | rne | round-to-nearest-even | `v[d-1] & (v[d-2:0]{ne}0 \| v[d])`
+| 1 | 0 | rdn | round-down (truncate) | `0`
+| 1 | 1 | rod | round-to-odd (OR bits into LSB, aka "jam") | `!v[d] & v[d-1:0]{ne}0`
+|===
+
+The rounding functions:
+----
+roundoff_unsigned(v, d) = (unsigned(v) >> d) + r
+roundoff_signed(v, d) = (signed(v) >> d) + r
+----
+are used to represent this operation in the instruction descriptions below.
+
+==== Vector Fixed-Point Saturation Flag `vxsat`
+
+The `vxsat` CSR has a single read-write least-significant bit
+(`vxsat[0]`) that indicates if a fixed-point instruction has had to
+saturate an output value to fit into a destination format.
+Bits `vxsat[XLEN-1:1]` should be written as zeros.
+
+The `vxsat` bit is mirrored in `vcsr`.
+
+==== Vector Control and Status Register `vcsr`
+
+The `vxrm` and `vxsat` separate CSRs can also be accessed via fields
+in the _XLEN_-bit-wide vector control and status CSR, `vcsr`.
+
+.vcsr layout
+[cols=">2,4,10"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Bits | Name | Description
+
+| XLEN-1:3 | | Reserved
+| 2:1 | vxrm[1:0] | Fixed-point rounding mode
+| 0 | vxsat | Fixed-point accrued saturation flag
+|===
+
+==== State of Vector Extension at Reset
+
+The vector extension must have a consistent state at reset. In
+particular, `vtype` and `vl` must have values that can be read and
+then restored with a single `vsetvl` instruction.
+
+NOTE: It is recommended that at reset, `vtype.vill` is set, the
+remaining bits in `vtype` are zero, and `vl` is set to zero.
+
+The `vstart`, `vxrm`, `vxsat` CSRs can have arbitrary values at reset.
+
+NOTE: Most uses of the vector unit will require an initial `vset{i}vl{i}`,
+which will reset `vstart`. The `vxrm` and `vxsat` fields should be
+reset explicitly in software before use.
+
+The vector registers can have arbitrary values at reset.
+
+=== Mapping of Vector Elements to Vector Register State
+
+The following diagrams illustrate how different width elements are
+packed into the bytes of a vector register depending on the current
+SEW and LMUL settings, as well as implementation VLEN. Elements are
+packed into each vector register with the least-significant byte in
+the lowest-numbered bits.
+
+The mapping was chosen to provide the simplest and most portable model
+for software, but might appear to incur large wiring cost for wider
+vector datapaths on certain operations. The vector instruction set
+was expressly designed to support implementations that internally
+rearrange vector data for different SEW to reduce datapath wiring
+costs, while externally preserving the simple software model.
+
+NOTE: For example, microarchitectures can track the EEW with which a
+vector register was written, and then insert additional scrambling
+operations to rearrange data if the register is accessed with a
+different EEW.
+
+==== Mapping for LMUL = 1
+
+When LMUL=1, elements are simply packed in order from the
+least-significant to most-significant bits of the vector register.
+
+NOTE: To increase readability, vector register layouts are drawn with
+bytes ordered from right to left with increasing byte address. Bits
+within an element are numbered in a little-endian format with
+increasing bit index from right to left corresponding to increasing
+magnitude.
+
+----
+LMUL=1 examples.
+
+The element index is given in hexadecimal and is shown placed at the
+least-significant byte of the stored element.
+
+
+ VLEN=32b
+
+ Byte 3 2 1 0
+
+ SEW=8b 3 2 1 0
+ SEW=16b 1 0
+ SEW=32b 0
+
+ VLEN=64b
+
+ Byte 7 6 5 4 3 2 1 0
+
+ SEW=8b 7 6 5 4 3 2 1 0
+ SEW=16b 3 2 1 0
+ SEW=32b 1 0
+ SEW=64b 0
+
+ VLEN=128b
+
+ Byte F E D C B A 9 8 7 6 5 4 3 2 1 0
+
+ SEW=8b F E D C B A 9 8 7 6 5 4 3 2 1 0
+ SEW=16b 7 6 5 4 3 2 1 0
+ SEW=32b 3 2 1 0
+ SEW=64b 1 0
+
+ VLEN=256b
+
+ Byte 1F1E1D1C1B1A19181716151413121110 F E D C B A 9 8 7 6 5 4 3 2 1 0
+
+ SEW=8b 1F1E1D1C1B1A19181716151413121110 F E D C B A 9 8 7 6 5 4 3 2 1 0
+ SEW=16b F E D C B A 9 8 7 6 5 4 3 2 1 0
+ SEW=32b 7 6 5 4 3 2 1 0
+ SEW=64b 3 2 1 0
+----
+
+==== Mapping for LMUL < 1
+
+When LMUL < 1, only the first LMUL*VLEN/SEW elements in the vector
+register are used. The remaining space in the vector register is
+treated as part of the tail, and hence must obey the vta setting.
+
+----
+ Example, VLEN=128b, LMUL=1/4
+
+ Byte F E D C B A 9 8 7 6 5 4 3 2 1 0
+
+ SEW=8b - - - - - - - - - - - - 3 2 1 0
+ SEW=16b - - - - - - 1 0
+ SEW=32b - - - 0
+----
+
+==== Mapping for LMUL > 1
+
+When vector registers are grouped, the elements of the vector register
+group are packed contiguously in element order beginning with the
+lowest-numbered vector register and moving to the
+next-highest-numbered vector register in the group once each vector
+register is filled.
+
+----
+ LMUL > 1 examples
+
+ VLEN=32b, SEW=8b, LMUL=2
+
+ Byte 3 2 1 0
+ v2*n 3 2 1 0
+ v2*n+1 7 6 5 4
+
+ VLEN=32b, SEW=16b, LMUL=2
+
+ Byte 3 2 1 0
+ v2*n 1 0
+ v2*n+1 3 2
+
+ VLEN=32b, SEW=16b, LMUL=4
+
+ Byte 3 2 1 0
+ v4*n 1 0
+ v4*n+1 3 2
+ v4*n+2 5 4
+ v4*n+3 7 6
+
+ VLEN=32b, SEW=32b, LMUL=4
+
+ Byte 3 2 1 0
+ v4*n 0
+ v4*n+1 1
+ v4*n+2 2
+ v4*n+3 3
+
+ VLEN=64b, SEW=32b, LMUL=2
+
+ Byte 7 6 5 4 3 2 1 0
+ v2*n 1 0
+ v2*n+1 3 2
+
+ VLEN=64b, SEW=32b, LMUL=4
+
+ Byte 7 6 5 4 3 2 1 0
+ v4*n 1 0
+ v4*n+1 3 2
+ v4*n+2 5 4
+ v4*n+3 7 6
+
+ VLEN=128b, SEW=32b, LMUL=2
+
+ Byte F E D C B A 9 8 7 6 5 4 3 2 1 0
+ v2*n 3 2 1 0
+ v2*n+1 7 6 5 4
+
+ VLEN=128b, SEW=32b, LMUL=4
+
+ Byte F E D C B A 9 8 7 6 5 4 3 2 1 0
+ v4*n 3 2 1 0
+ v4*n+1 7 6 5 4
+ v4*n+2 B A 9 8
+ v4*n+3 F E D C
+----
+
+[[sec-mapping-mixed]]
+==== Mapping across Mixed-Width Operations
+
+The vector ISA is designed to support mixed-width operations without
+requiring additional explicit rearrangement instructions. The
+recommended software strategy when operating on multiple vectors with
+different precision values is to modify `vtype` dynamically to keep
+SEW/LMUL constant (and hence VLMAX constant).
+
+The following example shows four different packed element widths (8b,
+16b, 32b, 64b) in a VLEN=128b implementation. The vector register
+grouping factor (LMUL) is increased by the relative element size such
+that each group can hold the same number of vector elements (VLMAX=8
+in this example) to simplify stripmining code.
+
+----
+Example VLEN=128b, with SEW/LMUL=16
+
+Byte F E D C B A 9 8 7 6 5 4 3 2 1 0
+vn - - - - - - - - 7 6 5 4 3 2 1 0 SEW=8b, LMUL=1/2
+
+vn 7 6 5 4 3 2 1 0 SEW=16b, LMUL=1
+
+v2*n 3 2 1 0 SEW=32b, LMUL=2
+v2*n+1 7 6 5 4
+
+v4*n 1 0 SEW=64b, LMUL=4
+v4*n+1 3 2
+v4*n+2 5 4
+v4*n+3 7 6
+----
+
+The following table shows each possible constant SEW/LMUL operating
+point for loops with mixed-width operations. Each column represents a
+constant SEW/LMUL operating point. Entries in table are the LMUL
+values that yield that column's SEW/LMUL value for the datawidth on
+that row. In each column, an LMUL setting for a datawidth indicates
+that it can be aligned with the other datawidths in the same column
+that also have an LMUL setting, such that all have the same VLMAX.
+
+|===
+| 7+^| SEW/LMUL
+| | 1 | 2 | 4 | 8 | 16 | 32 | 64
+
+| SEW= 8 | 8 | 4 | 2 | 1 | 1/2 | 1/4 | 1/8
+| SEW= 16 | | 8 | 4 | 2 | 1 | 1/2 | 1/4
+| SEW= 32 | | | 8 | 4 | 2 | 1 | 1/2
+| SEW= 64 | | | | 8 | 4 | 2 | 1
+|===
+
+Larger LMUL settings can also used to simply increase vector length to
+reduce instruction fetch and dispatch overheads in cases where fewer
+vector register groups are needed.
+
+[[sec-mask-register-layout]]
+==== Mask Register Layout
+
+A vector mask occupies only one vector register regardless of SEW and
+LMUL.
+
+Each element is allocated a single mask bit in a mask vector register.
+The mask bit for element _i_ is located in bit _i_ of the mask
+register, independent of SEW or LMUL.
+
+=== Vector Instruction Formats
+
+The instructions in the vector extension fit under two existing major
+opcodes (LOAD-FP and STORE-FP) and one new major opcode (OP-V).
+
+Vector loads and stores are encoded within the scalar floating-point
+load and store major opcodes (LOAD-FP/STORE-FP). The vector load and
+store encodings repurpose a portion of the standard scalar
+floating-point load/store 12-bit immediate field to provide further
+vector instruction encoding, with bit 25 holding the standard vector
+mask bit (see <<sec-vector-mask-encoding>>).
+
+include::images/wavedrom/vmem-format.adoc[]
+
+include::images/wavedrom/valu-format.adoc[]
+
+include::images/wavedrom/vcfg-format.adoc[]
+
+Vector instructions can have scalar or vector source operands and
+produce scalar or vector results, and most vector instructions can be
+performed either unconditionally or conditionally under a mask.
+
+Vector loads and stores move bit patterns between vector register
+elements and memory. Vector arithmetic instructions operate on values
+held in vector register elements.
+
+==== Scalar Operands
+
+Scalar operands can be immediates, or taken from the `x` registers,
+the `f` registers, or element 0 of a vector register. Scalar results
+are written to an `x` or `f` register or to element 0 of a vector
+register. Any vector register can be used to hold a scalar regardless
+of the current LMUL setting.
+
+NOTE: Zfinx ("F in X") is a new ISA extension where
+floating-point instructions take their arguments from the integer
+register file. The vector extension is also compatible with Zfinx,
+where the Zfinx vector extension has vector-scalar floating-point
+instructions taking their scalar argument from the `x` registers.
+
+NOTE: We considered but did not pursue overlaying the `f` registers on
+`v` registers. The adopted approach reduces vector register pressure,
+avoids interactions with the standard calling convention, simplifies
+high-performance scalar floating-point design, and provides
+compatibility with the Zfinx ISA option. Overlaying `f` with `v`
+would provide the advantage of lowering the number of state bits in
+some implementations, but complicates high-performance designs and
+would prevent compatibility with the Zfinx ISA option.
+
+[[sec-vec-operands]]
+==== Vector Operands
+
+Each vector operand has an _effective_ _element_ _width_ (EEW) and an
+_effective_ LMUL (EMUL) that is used to determine the size and
+location of all the elements within a vector register group. By
+default, for most operands of most instructions, EEW=SEW and
+EMUL=LMUL.
+
+Some vector instructions have source and destination vector operands
+with the same number of elements but different widths, so that EEW and
+EMUL differ from SEW and LMUL respectively but EEW/EMUL = SEW/LMUL.
+For example, most widening arithmetic instructions have a source group
+with EEW=SEW and EMUL=LMUL but have a destination group with EEW=2*SEW and
+EMUL=2*LMUL. Narrowing instructions have a source operand that has
+EEW=2*SEW and EMUL=2*LMUL but with a destination where EEW=SEW and EMUL=LMUL.
+
+Vector operands or results may occupy one or more vector registers
+depending on EMUL, but are always specified using the lowest-numbered
+vector register in the group. Using other than the lowest-numbered
+vector register to specify a vector register group is a reserved
+encoding.
+
+A vector register cannot be used to provide source operands with more
+than one EEW for a single instruction. A mask register source is
+considered to have EEW=1 for this constraint. An encoding that would
+result in the same vector register being read with two or more
+different EEWs, including when the vector register appears at
+different positions within two or more vector register groups, is
+reserved.
+
+NOTE: In practice, there is no software benefit to reading the same
+register with different EEW in the same instruction, and this
+constraint reduces complexity for implementations that internally
+rearrange data dependent on EEW.
+
+A destination vector register group can overlap a source vector register
+group only if one of the following holds:
+
+- The destination EEW equals the source EEW.
+- The destination EEW is smaller than the source EEW and the overlap is in
+ the lowest-numbered part of the source register group (e.g., when LMUL=1,
+ `vnsrl.wi v0, v0, 3` is legal, but a destination of `v1` is not).
+- The destination EEW is greater than the source EEW, the source EMUL is
+ at least 1, and the overlap is in the highest-numbered part of the
+ destination register group (e.g., when LMUL=8, `vzext.vf4 v0, v6` is legal,
+ but a source of `v0`, `v2`, or `v4` is not).
+
+For the purpose of determining register group overlap constraints,
+mask elements have EEW=1.
+
+NOTE: The overlap constraints are designed to support resumable
+exceptions in machines without register renaming.
+
+Any instruction encoding that violates the overlap constraints is reserved.
+
+When source and destination registers overlap and have different EEW, the
+instruction is mask- and tail-agnostic, regardless of the setting of the
+`vta` and `vma` bits in `vtype`.
+
+The largest vector register group used by an instruction can not be
+greater than 8 vector registers (i.e., EMUL{le}8), and if a vector
+instruction would require greater than 8 vector registers in a group,
+the instruction encoding is reserved. For example, a widening
+operation that produces a widened vector register group result when
+LMUL=8 is reserved as this would imply a result EMUL=16.
+
+Widened scalar values, e.g., input and output to a widening reduction
+operation, are held in the first element of a vector register and
+have EMUL=1.
+
+==== Vector Masking
+
+Masking is supported on many vector instructions. Element operations
+that are masked off (inactive) never generate exceptions. The
+destination vector register elements corresponding to masked-off
+elements are handled with either a mask-undisturbed or mask-agnostic
+policy depending on the setting of the `vma` bit in `vtype` (Section
+<<sec-agnostic>>).
+
+The mask value used to control execution of a masked vector
+instruction is always supplied by vector register `v0`.
+
+NOTE: Masks are held in vector registers, rather than in a separate mask
+register file, to reduce total architectural state and to simplify the ISA.
+
+NOTE: Future vector extensions may provide longer instruction
+encodings with space for a full mask register specifier.
+
+The destination vector register group for a masked vector instruction
+cannot overlap the source mask register (`v0`), unless the destination
+vector register is being written with a mask value (e.g., compares)
+or the scalar result of a reduction. These instruction encodings are
+reserved.
+
+NOTE: This constraint supports restart with a non-zero `vstart` value.
+
+Other vector registers can be used to hold working mask values, and
+mask vector logical operations are provided to perform predicate
+calculations. [[sec-mask-vector-logical]]
+
+As specified in Section <<sec-agnostic>>, mask destination values are
+always treated as tail-agnostic, regardless of the setting of `vta`.
+
+[[sec-vector-mask-encoding]]
+===== Mask Encoding
+
+Where available, masking is encoded in a single-bit `vm` field in the
+ instruction (`inst[25]`).
+
+[cols="1,15"]
+|===
+| vm | Description
+
+| 0 | vector result, only where v0.mask[i] = 1
+| 1 | unmasked
+|===
+
+Vector masking is represented in assembler code as another vector
+operand, with `.t` indicating that the operation occurs when
+`v0.mask[i]` is `1` (`t` for "true"). If no masking operand is
+specified, unmasked vector execution (`vm=1`) is assumed.
+
+----
+ vop.v* v1, v2, v3, v0.t # enabled where v0.mask[i]=1, vm=0
+ vop.v* v1, v2, v3 # unmasked vector operation, vm=1
+----
+
+NOTE: Even though the current vector extensions only support one vector
+mask register `v0` and only the true form of predication, the assembly
+syntax writes it out in full to be compatible with future extensions
+that might add a mask register specifier and support both true and
+complement mask values. The `.t` suffix on the masking operand also helps
+to visually encode the use of a mask.
+
+NOTE: The `.mask` suffix is not part of the assembly syntax.
+We only append it in contexts where a mask vector is subscripted,
+e.g., `v0.mask[i]`.
+
+[[sec-inactive-defs]]
+==== Prestart, Active, Inactive, Body, and Tail Element Definitions
+
+The destination element indices operated on during a vector
+instruction's execution can be divided into three disjoint subsets.
+
+* The _prestart_ elements are those whose element index is less than the
+initial value in the `vstart` register. The prestart elements do not
+raise exceptions and do not update the destination vector register.
+
+* The _body_ elements are those whose element index is greater than or equal
+to the initial value in the `vstart` register, and less than the current
+vector length setting in `vl`. The body can be split into two disjoint subsets:
+
+** The _active_ elements during a vector instruction's execution are the
+elements within the body and where the current mask is enabled at that element
+position. The active elements can raise exceptions and update the destination
+vector register group.
+
+** The _inactive_ elements are the elements within the body
+but where the current mask is disabled at that element
+position. The inactive elements do not raise exceptions and do not
+update any destination vector register group unless masked agnostic is
+specified (`vtype.vma`=1), in which case inactive elements may be
+overwritten with 1s.
+
+* The _tail_ elements during a vector instruction's execution are the
+elements past the current vector length setting specified in `vl`.
+The tail elements do not raise exceptions, and do not update any
+destination vector register group unless tail agnostic is specified
+(`vtype.vta`=1), in which case tail elements may be overwritten with
+1s, or with the result of the instruction in the case of
+mask-producing instructions except for mask loads. When LMUL < 1, the
+tail includes the elements past VLMAX that are held in the same vector
+register.
+
+----
+ for element index x
+ prestart(x) = (0 <= x < vstart)
+ body(x) = (vstart <= x < vl)
+ tail(x) = (vl <= x < max(VLMAX,VLEN/SEW))
+ mask(x) = unmasked || v0.mask[x] == 1
+ active(x) = body(x) && mask(x)
+ inactive(x) = body(x) && !mask(x)
+----
+
+When `vstart` {ge} `vl`, there are no body elements, and no elements
+are updated in any destination vector register group, including that
+no tail elements are updated with agnostic values.
+
+NOTE: As a consequence, when `vl`=0, no elements, including agnostic
+elements, are updated in the destination vector register group
+regardless of `vstart`.
+
+Instructions that write an `x` register or `f` register
+do so even when `vstart` {ge} `vl`, including when `vl`=0.
+
+NOTE: Some instructions such as `vslidedown` and `vrgather` may read
+indices past `vl` or even VLMAX in source vector register groups. The
+general policy is to return the value 0 when the index is greater than
+VLMAX in the source vector register group.
+
+[[sec-vector-config]]
+=== Configuration-Setting Instructions (`vsetvli`/`vsetivli`/`vsetvl`)
+
+One of the common approaches to handling a large number of elements is
+"stripmining" where each iteration of a loop handles some number of elements,
+and the iterations continue until all elements have been processed. The RISC-V
+vector specification provides direct, portable support for this approach.
+The application specifies the total number of elements to be processed (the application vector length or AVL) as a
+candidate value for `vl`, and the hardware responds via a general-purpose
+register with the (frequently smaller) number of elements that the hardware
+will handle per iteration (stored in `vl`), based on the microarchitectural
+implementation and the `vtype` setting. A straightforward loop structure,
+shown in <<example-stripmine-sew>>, depicts the ease with which the code keeps
+track of the remaining number of elements and the amount per iteration handled
+by hardware.
+
+A set of instructions is provided to allow rapid configuration of the
+values in `vl` and `vtype` to match application needs. The
+`vset{i}vl{i}` instructions set the `vtype` and `vl` CSRs based on
+their arguments, and write the new value of `vl` into `rd`.
+
+----
+ vsetvli rd, rs1, vtypei # rd = new vl, rs1 = AVL, vtypei = new vtype setting
+ vsetivli rd, uimm, vtypei # rd = new vl, uimm = AVL, vtypei = new vtype setting
+ vsetvl rd, rs1, rs2 # rd = new vl, rs1 = AVL, rs2 = new vtype value
+----
+
+include::images/wavedrom/vcfg-format.adoc[]
+
+==== `vtype` encoding
+
+include::images/wavedrom/vtype-format.adoc[]
+
+The new `vtype` value is encoded in the immediate fields of `vsetvli`
+and `vsetivli`, and in the `rs2` register for `vsetvl`.
+
+----
+ Suggested assembler names used for vset{i}vli vtypei immediate
+
+ e8 # SEW=8b
+ e16 # SEW=16b
+ e32 # SEW=32b
+ e64 # SEW=64b
+
+ mf8 # LMUL=1/8
+ mf4 # LMUL=1/4
+ mf2 # LMUL=1/2
+ m1 # LMUL=1, assumed if m setting absent
+ m2 # LMUL=2
+ m4 # LMUL=4
+ m8 # LMUL=8
+
+Examples:
+ vsetvli t0, a0, e8, ta, ma # SEW= 8, LMUL=1
+ vsetvli t0, a0, e8, m2, ta, ma # SEW= 8, LMUL=2
+ vsetvli t0, a0, e32, mf2, ta, ma # SEW=32, LMUL=1/2
+----
+
+The `vsetvl` variant operates similarly to `vsetvli` except that it
+takes a `vtype` value from `rs2` and can be used for context restore.
+
+===== Unsupported `vtype` Values
+
+If the `vtype` value is not supported by the implementation, then
+the `vill` bit is set in `vtype`, the remaining bits in `vtype` are
+set to zero, and the `vl` register is also set to zero.
+
+NOTE: Earlier drafts required a trap when setting `vtype` to an
+illegal value. However, this would have added the first
+data-dependent trap on a CSR write to the ISA. Implementations could
+choose to trap when illegal values are written to `vtype` instead of
+setting `vill`, to allow emulation to support new configurations for
+forward-compatibility. The current scheme supports light-weight
+runtime interrogation of the supported vector unit configurations by
+checking if `vill` is clear for a given setting.
+
+A `vtype` value with `vill` set is treated as an unsupported
+configuration.
+
+Implementations must consider all bits of the `vtype` value to
+determine if the configuration is supported. An unsupported value in
+any location within the `vtype` value must result in `vill` being set.
+
+NOTE: In particular, all XLEN bits of the register `vtype` argument to
+the `vsetvl` instruction must be checked. Implementations cannot
+ignore fields they do not implement. All bits must be checked to
+ensure that new code assuming unsupported vector features in `vtype`
+traps instead of executing incorrectly on an older implementation.
+
+==== AVL encoding
+
+The new vector
+length setting is based on AVL, which for `vsetvli` and `vsetvl` is encoded in the `rs1` and `rd`
+fields as follows:
+
+.AVL used in `vsetvli` and `vsetvl` instructions
+[cols="2,2,10,10"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| `rd` | `rs1` | AVL value | Effect on `vl`
+| - | !x0 | Value in `x[rs1]` | Normal stripmining
+| !x0 | x0 | ~0 | Set `vl` to VLMAX
+| x0 | x0 | Value in `vl` register | Keep existing `vl` (of course, `vtype` may change)
+|===
+
+When `rs1` is not `x0`, the AVL is an unsigned integer held in the `x`
+register specified by `rs1`, and the new `vl` value is also written to
+the `x` register specified by `rd`.
+
+When `rs1=x0` but `rd!=x0`, the maximum unsigned integer value (`~0`)
+is used as the AVL, and the resulting VLMAX is written to `vl` and
+also to the `x` register specified by `rd`.
+
+When `rs1=x0` and `rd=x0`, the instruction operates as if the current
+vector length in `vl` is used as the AVL, and the resulting value is
+written to `vl`, but not to a destination register. This form can
+only be used when VLMAX and hence `vl` is not actually changed by the
+new SEW/LMUL ratio. Use of the instruction with a new SEW/LMUL ratio
+that would result in a change of VLMAX is reserved.
+Use of the instruction is also reserved if `vill` was 1 beforehand.
+Implementations may set `vill` in either case.
+
+NOTE: This last form of the instructions allows the `vtype` register to
+be changed while maintaining the current `vl`, provided VLMAX is not
+reduced. This design was chosen to ensure `vl` would always hold a
+legal value for current `vtype` setting. The current `vl` value can
+be read from the `vl` CSR. The `vl` value could be reduced by this
+instruction if the new SEW/LMUL ratio causes VLMAX to shrink, and so
+this case has been reserved as it is not clear this is a generally
+useful operation, and implementations can otherwise assume `vl` is not
+changed by this instruction to optimize their microarchitecture.
+
+For the `vsetivli` instruction, the AVL is encoded as a 5-bit
+zero-extended immediate (0--31) in the `rs1` field.
+
+NOTE: The encoding of AVL for `vsetivli` is the same as for regular
+CSR immediate values.
+
+NOTE: The `vsetivli` instruction provides more compact code when the
+dimensions of vectors are small and known to fit inside the vector
+registers, in which case there is no stripmining overhead.
+
+==== Constraints on Setting `vl`
+
+The `vset{i}vl{i}` instructions first set VLMAX according to their `vtype`
+argument, then set `vl` obeying the following constraints:
+
+. `vl = AVL` if `AVL {le} VLMAX`
+. `ceil(AVL / 2) {le} vl {le} VLMAX` if `AVL < (2 * VLMAX)`
+. `vl = VLMAX` if `AVL {ge} (2 * VLMAX)`
+. Deterministic on any given implementation for same input AVL and VLMAX values
+. These specific properties follow from the prior rules:
+.. `vl = 0` if `AVL = 0`
+.. `vl > 0` if `AVL > 0`
+.. `vl {le} VLMAX`
+.. `vl {le} AVL`
+.. a value read from `vl` when used as the AVL argument to `vset{i}vl{i}` results in the same
+value in `vl`, provided the resultant VLMAX equals the value of VLMAX at the time that `vl` was read
+
+[NOTE]
+--
+The `vl` setting rules are designed to be sufficiently strict to
+preserve `vl` behavior across register spills and context swaps for
+`AVL {le} VLMAX`, yet flexible enough to enable implementations to improve
+vector lane utilization for `AVL > VLMAX`.
+
+For example, this permits an implementation to set `vl = ceil(AVL / 2)`
+for `VLMAX < AVL < 2*VLMAX` in order to evenly distribute work over the
+last two iterations of a stripmine loop.
+Requirement 2 ensures that the first stripmine iteration of reduction
+loops uses the largest vector length of all iterations, even in the case
+of `AVL < 2*VLMAX`.
+This allows software to avoid needing to explicitly calculate a running
+maximum of vector lengths observed during a stripmined loop.
+Requirement 2 also allows an implementation to set vl to VLMAX for `VLMAX < AVL < 2*VLMAX`
+--
+
+[[example-stripmine-sew]]
+==== Example of stripmining and changes to SEW
+
+The SEW and LMUL settings can be changed dynamically to provide high
+throughput on mixed-width operations in a single loop.
+----
+# Example: Load 16-bit values, widen multiply to 32b, shift 32b result
+# right by 3, store 32b values.
+# On entry:
+# a0 holds the total number of elements to process
+# a1 holds the address of the source array
+# a2 holds the address of the destination array
+
+loop:
+ vsetvli a3, a0, e16, m4, ta, ma # vtype = 16-bit integer vectors;
+ # also update a3 with vl (# of elements this iteration)
+ vle16.v v4, (a1) # Get 16b vector
+ slli t1, a3, 1 # Multiply # elements this iteration by 2 bytes/source element
+ add a1, a1, t1 # Bump pointer
+ vwmul.vx v8, v4, x10 # Widening multiply into 32b in <v8--v15>
+
+ vsetvli x0, x0, e32, m8, ta, ma # Operate on 32b values
+ vsrl.vi v8, v8, 3
+ vse32.v v8, (a2) # Store vector of 32b elements
+ slli t1, a3, 2 # Multiply # elements this iteration by 4 bytes/destination element
+ add a2, a2, t1 # Bump pointer
+ sub a0, a0, a3 # Decrement count by vl
+ bnez a0, loop # Any more?
+----
+
+[[sec-vector-memory]]
+=== Vector Loads and Stores
+
+Vector loads and stores move values between vector registers and
+memory.
+Vector loads and stores can be masked, and they only access memory or raise
+exceptions for active elements.
+Masked vector loads do not update inactive elements in the destination vector
+register group, unless masked agnostic is specified (`vtype.vma`=1).
+All vector loads and stores may
+generate and accept a non-zero `vstart` value.
+
+==== Vector Load/Store Instruction Encoding
+
+Vector loads and stores are encoded within the scalar floating-point
+load and store major opcodes (LOAD-FP/STORE-FP). The vector load and
+store encodings repurpose a portion of the standard scalar
+floating-point load/store 12-bit immediate field to provide further
+vector instruction encoding, with bit 25 holding the standard vector
+mask bit (see <<sec-vector-mask-encoding>>).
+
+include::images/wavedrom/vmem-format.adoc[]
+
+[cols="4,12"]
+|===
+| Field | Description
+
+| rs1[4:0] | specifies x register holding base address
+| rs2[4:0] | specifies x register holding stride
+| vs2[4:0] | specifies v register holding address offsets
+| vs3[4:0] | specifies v register holding store data
+| vd[4:0] | specifies v register destination of load
+| vm | specifies whether vector masking is enabled (0 = mask enabled, 1 = mask disabled)
+| width[2:0] | specifies size of memory elements, and distinguishes from FP scalar
+| mew | extended memory element width. See <<sec-vector-loadstore-width-encoding>>
+| mop[1:0] | specifies memory addressing mode
+| nf[2:0] | specifies the number of fields in each segment, for segment load/stores
+| lumop[4:0]/sumop[4:0] | are additional fields encoding variants of unit-stride instructions
+|===
+
+Vector memory unit-stride and constant-stride operations directly
+encode EEW of the data to be transferred statically in the instruction
+to reduce the number of `vtype` changes when accessing memory in a
+mixed-width routine. Indexed operations use the explicit EEW encoding
+in the instruction to set the size of the indices used, and use
+SEW/LMUL to specify the data width.
+
+==== Vector Load/Store Addressing Modes
+
+The vector extension supports unit-stride, strided, and
+indexed (scatter/gather) addressing modes. Vector load/store base
+registers and strides are taken from the GPR `x` registers.
+
+The base effective address for all vector accesses is given by the
+contents of the `x` register named in `rs1`.
+
+Vector unit-stride operations access elements stored contiguously in
+memory starting from the base effective address.
+
+Vector constant-strided operations access the first memory element at the base
+effective address, and then access subsequent elements at address
+increments given by the byte offset contained in the `x` register
+specified by `rs2`.
+
+Vector indexed operations add the contents of each element of the
+vector offset operand specified by `vs2` to the base effective address
+to give the effective address of each element. The data vector
+register group has EEW=SEW, EMUL=LMUL, while the offset vector
+register group has EEW encoded in the instruction and
+EMUL=(EEW/SEW)*LMUL.
+
+The vector offset operand is treated as a vector of byte-address
+offsets.
+
+NOTE: The indexed operations can also be used to access fields within
+a vector of objects, where the `vs2` vector holds pointers to the base
+of the objects and the scalar `x` register holds the offset of the
+member field in each object. Supporting this case is why the indexed
+operations were not defined to scale the element indices by the data
+EEW.
+
+If the vector offset elements are narrower than XLEN, they are
+zero-extended to XLEN before adding to the base effective address. If
+the vector offset elements are wider than XLEN, the least-significant
+XLEN bits are used in the address calculation. An implementation must
+raise an illegal instruction exception if the EEW is not supported for
+offset elements.
+
+NOTE: A profile may place an upper limit on the maximum supported index
+EEW (e.g., only up to XLEN) smaller than ELEN.
+
+The vector addressing modes are encoded using the 2-bit `mop[1:0]`
+field.
+
+.encoding for loads
+[cols="1,1,7,6"]
+|===
+2+| mop [1:0] | Description | Opcodes
+
+| 0 | 0 | unit-stride | VLE<EEW>
+| 0 | 1 | indexed-unordered | VLUXEI<EEW>
+| 1 | 0 | strided | VLSE<EEW>
+| 1 | 1 | indexed-ordered | VLOXEI<EEW>
+|===
+
+.encoding for stores
+[cols="1,1,7,6"]
+|===
+2+| mop [1:0] | Description | Opcodes
+
+| 0 | 0 | unit-stride | VSE<EEW>
+| 0 | 1 | indexed-unordered | VSUXEI<EEW>
+| 1 | 0 | strided | VSSE<EEW>
+| 1 | 1 | indexed-ordered | VSOXEI<EEW>
+|===
+
+Vector unit-stride and constant-stride memory accesses do not
+guarantee ordering between individual element accesses. The vector
+indexed load and store memory operations have two forms, ordered and
+unordered. The indexed-ordered variants preserve element ordering on
+memory accesses.
+
+For unordered instructions (`mop[1:0]`!=11) there is no guarantee on
+element access order. If the accesses are to a strongly ordered IO
+region, the element accesses can be initiated in any order.
+
+NOTE: To provide ordered vector accesses to a strongly ordered IO
+region, the ordered indexed instructions should be used.
+
+For implementations with precise vector traps, exceptions on
+indexed-unordered stores must also be precise.
+
+Additional unit-stride vector addressing modes are encoded using the
+5-bit `lumop` and `sumop` fields in the unit-stride load and store
+instruction encodings respectively.
+
+.lumop
+[cols="1,1,1,1,1,11"]
+|===
+5+| lumop[4:0] | Description
+
+| 0 | 0 | 0 | 0 | 0 | unit-stride load
+| 0 | 1 | 0 | 0 | 0 | unit-stride, whole register load
+| 0 | 1 | 0 | 1 | 1 | unit-stride, mask load, EEW=8
+| 1 | 0 | 0 | 0 | 0 | unit-stride fault-only-first
+| x | x | x | x | x | other encodings reserved
+|===
+
+.sumop
+[cols="1,1,1,1,1,11"]
+|===
+5+| sumop[4:0] | Description
+
+| 0 | 0 | 0 | 0 | 0 | unit-stride store
+| 0 | 1 | 0 | 0 | 0 | unit-stride, whole register store
+| 0 | 1 | 0 | 1 | 1 | unit-stride, mask store, EEW=8
+| x | x | x | x | x | other encodings reserved
+|===
+
+The `nf[2:0]` field encodes the number of fields in each segment. For
+regular vector loads and stores, `nf`=0, indicating that a single
+value is moved between a vector register group and memory at each
+element position. Larger values in the `nf` field are used to access
+multiple contiguous fields within a segment as described below in
+Section <<sec-aos>>.
+
+The `nf[2:0]` field also encodes the number of whole vector registers
+to transfer for the whole vector register load/store instructions.
+
+[[sec-vector-loadstore-width-encoding]]
+==== Vector Load/Store Width Encoding
+
+Vector loads and stores have an EEW encoded directly in the
+instruction. The corresponding EMUL is calculated as EMUL =
+(EEW/SEW)*LMUL. If the EMUL would be out of range (EMUL>8 or
+EMUL<1/8), the instruction encoding is reserved. The vector register
+groups must have legal register specifiers for the selected EMUL,
+otherwise the instruction encoding is reserved.
+
+Vector unit-stride and constant-stride use the EEW/EMUL encoded in the
+instruction for the data values, while vector indexed loads and stores
+use the EEW/EMUL encoded in the instruction for the index values and
+the SEW/LMUL encoded in `vtype` for the data values.
+
+Vector loads and stores are encoded using width values that are not
+claimed by the standard scalar floating-point loads and stores.
+
+Implementations must provide vector loads and stores with EEWs
+corresponding to all supported SEW settings. Vector load/store
+encodings for unsupported EEW widths must raise an illegal
+instruction exception.
+
+.Width encoding for vector loads and stores.
+[cols="5,1,1,1,1,>3,>3,>3,3"]
+|===
+| | mew 3+| width [2:0] | Mem bits | Data Reg bits | Index bits | Opcodes
+
+| Standard scalar FP | x | 0 | 0 | 1 | 16| FLEN | - | FLH/FSH
+| Standard scalar FP | x | 0 | 1 | 0 | 32| FLEN | - | FLW/FSW
+| Standard scalar FP | x | 0 | 1 | 1 | 64| FLEN | - | FLD/FSD
+| Standard scalar FP | x | 1 | 0 | 0 | 128| FLEN | - | FLQ/FSQ
+| Vector 8b element | 0 | 0 | 0 | 0 | 8| 8 | - | VLxE8/VSxE8
+| Vector 16b element | 0 | 1 | 0 | 1 | 16| 16 | - | VLxE16/VSxE16
+| Vector 32b element | 0 | 1 | 1 | 0 | 32| 32 | - | VLxE32/VSxE32
+| Vector 64b element | 0 | 1 | 1 | 1 | 64| 64 | - | VLxE64/VSxE64
+| Vector 8b index | 0 | 0 | 0 | 0 | SEW | SEW | 8 | VLxEI8/VSxEI8
+| Vector 16b index | 0 | 1 | 0 | 1 | SEW | SEW | 16 | VLxEI16/VSxEI16
+| Vector 32b index | 0 | 1 | 1 | 0 | SEW | SEW | 32 | VLxEI32/VSxEI32
+| Vector 64b index | 0 | 1 | 1 | 1 | SEW | SEW | 64 | VLxEI64/VSxEI64
+| Reserved | 1 | X | X | X | - | - | - |
+|===
+
+Mem bits is the size of each element accessed in memory.
+
+Data reg bits is the size of each data element accessed in register.
+
+Index bits is the size of each index accessed in register.
+
+The `mew` bit (`inst[28]`) when set is expected to be used to encode
+expanded memory sizes of 128 bits and above, but these encodings are
+currently reserved.
+
+==== Vector Unit-Stride Instructions
+
+----
+ # Vector unit-stride loads and stores
+
+ # vd destination, rs1 base address, vm is mask encoding (v0.t or <missing>)
+ vle8.v vd, (rs1), vm # 8-bit unit-stride load
+ vle16.v vd, (rs1), vm # 16-bit unit-stride load
+ vle32.v vd, (rs1), vm # 32-bit unit-stride load
+ vle64.v vd, (rs1), vm # 64-bit unit-stride load
+
+ # vs3 store data, rs1 base address, vm is mask encoding (v0.t or <missing>)
+ vse8.v vs3, (rs1), vm # 8-bit unit-stride store
+ vse16.v vs3, (rs1), vm # 16-bit unit-stride store
+ vse32.v vs3, (rs1), vm # 32-bit unit-stride store
+ vse64.v vs3, (rs1), vm # 64-bit unit-stride store
+----
+
+Additional unit-stride mask load and store instructions are
+provided to transfer mask values to/from memory. These
+operate similarly to unmasked byte loads or stores (EEW=8), except that
+the effective vector length is ``evl``=ceil(``vl``/8) (i.e. EMUL=1),
+and the destination register is always written with a tail-agnostic
+policy.
+
+----
+ # Vector unit-stride mask load
+ vlm.v vd, (rs1) # Load byte vector of length ceil(vl/8)
+
+ # Vector unit-stride mask store
+ vsm.v vs3, (rs1) # Store byte vector of length ceil(vl/8)
+----
+
+`vlm.v` and `vsm.v` are encoded with the same `width[2:0]`=0 encoding as
+`vle8.v` and `vse8.v`, but are distinguished by different
+`lumop` and `sumop` encodings. Since `vlm.v` and `vsm.v` operate as byte loads and stores,
+`vstart` is in units of bytes for these instructions.
+
+NOTE: `vlm.v` and `vsm.v` respect the `vill` field in `vtype`, as
+they depend on `vtype` indirectly through its constraints on `vl`.
+
+NOTE: The previous assembler mnemonics `vle1.v` and `vse1.v` were
+confusing as length was handled differently for these instructions
+versus other element load/store instructions. To avoid software
+churn, these older assembly mnemonics are being retained as aliases.
+
+NOTE: The primary motivation to provide mask load and store is to
+support machines that internally rearrange data to reduce
+cross-datapath wiring. However, these instructions also provide a convenient
+mechanism to use packed bit vectors in memory as mask values,
+and also reduce the cost of mask spill/fill by reducing need to change
+`vl`.
+
+==== Vector Strided Instructions
+
+----
+ # Vector strided loads and stores
+
+ # vd destination, rs1 base address, rs2 byte stride
+ vlse8.v vd, (rs1), rs2, vm # 8-bit strided load
+ vlse16.v vd, (rs1), rs2, vm # 16-bit strided load
+ vlse32.v vd, (rs1), rs2, vm # 32-bit strided load
+ vlse64.v vd, (rs1), rs2, vm # 64-bit strided load
+
+ # vs3 store data, rs1 base address, rs2 byte stride
+ vsse8.v vs3, (rs1), rs2, vm # 8-bit strided store
+ vsse16.v vs3, (rs1), rs2, vm # 16-bit strided store
+ vsse32.v vs3, (rs1), rs2, vm # 32-bit strided store
+ vsse64.v vs3, (rs1), rs2, vm # 64-bit strided store
+----
+
+Negative and zero strides are supported.
+
+Element accesses within a strided instruction are unordered with
+respect to each other.
+
+When `rs2`=`x0`, then an implementation is allowed, but not required,
+to perform fewer memory operations than the number of active elements,
+and may perform different numbers of memory operations across
+different dynamic executions of the same static instruction.
+
+NOTE: Compilers must be aware to not use the `x0` form for rs2 when
+the immediate stride is `0` if the intent is to require all memory
+accesses are performed.
+
+When `rs2!=x0` and the value of `x[rs2]=0`, the implementation must
+perform one memory access for each active element (but these accesses
+will not be ordered).
+
+NOTE: As with other architectural mandates, implementations must
+_appear_ to perform each memory access. Microarchitectures are
+free to optimize away accesses that would not be observed by another
+agent, for example, in idempotent memory regions obeying RVWMO. For
+non-idempotent memory regions, where by definition each access can be
+observed by a device, the optimization would not be possible.
+
+NOTE: When repeating ordered vector accesses to the same memory
+address are required, then an ordered indexed operation can be used.
+
+==== Vector Indexed Instructions
+
+----
+ # Vector indexed loads and stores
+
+ # Vector indexed-unordered load instructions
+ # vd destination, rs1 base address, vs2 byte offsets
+ vluxei8.v vd, (rs1), vs2, vm # unordered 8-bit indexed load of SEW data
+ vluxei16.v vd, (rs1), vs2, vm # unordered 16-bit indexed load of SEW data
+ vluxei32.v vd, (rs1), vs2, vm # unordered 32-bit indexed load of SEW data
+ vluxei64.v vd, (rs1), vs2, vm # unordered 64-bit indexed load of SEW data
+
+ # Vector indexed-ordered load instructions
+ # vd destination, rs1 base address, vs2 byte offsets
+ vloxei8.v vd, (rs1), vs2, vm # ordered 8-bit indexed load of SEW data
+ vloxei16.v vd, (rs1), vs2, vm # ordered 16-bit indexed load of SEW data
+ vloxei32.v vd, (rs1), vs2, vm # ordered 32-bit indexed load of SEW data
+ vloxei64.v vd, (rs1), vs2, vm # ordered 64-bit indexed load of SEW data
+
+ # Vector indexed-unordered store instructions
+ # vs3 store data, rs1 base address, vs2 byte offsets
+ vsuxei8.v vs3, (rs1), vs2, vm # unordered 8-bit indexed store of SEW data
+ vsuxei16.v vs3, (rs1), vs2, vm # unordered 16-bit indexed store of SEW data
+ vsuxei32.v vs3, (rs1), vs2, vm # unordered 32-bit indexed store of SEW data
+ vsuxei64.v vs3, (rs1), vs2, vm # unordered 64-bit indexed store of SEW data
+
+ # Vector indexed-ordered store instructions
+ # vs3 store data, rs1 base address, vs2 byte offsets
+ vsoxei8.v vs3, (rs1), vs2, vm # ordered 8-bit indexed store of SEW data
+ vsoxei16.v vs3, (rs1), vs2, vm # ordered 16-bit indexed store of SEW data
+ vsoxei32.v vs3, (rs1), vs2, vm # ordered 32-bit indexed store of SEW data
+ vsoxei64.v vs3, (rs1), vs2, vm # ordered 64-bit indexed store of SEW data
+
+----
+
+NOTE: The assembler syntax for indexed loads and stores uses
+``ei``__x__ instead of ``e``__x__ to indicate the statically encoded EEW
+is of the index not the data.
+
+NOTE: The indexed operations mnemonics have a "U" or "O" to
+distinguish between unordered and ordered, while the other vector
+addressing modes have no character. While this is perhaps a little
+less consistent, this approach minimizes disruption to existing
+software, as VSXEI previously meant "ordered" - and the opcode can be
+retained as an alias during transition to help reduce software churn.
+
+==== Unit-stride Fault-Only-First Loads
+
+The unit-stride fault-only-first load instructions are used to
+vectorize loops with data-dependent exit conditions ("while" loops).
+These instructions execute as a regular load except that they will
+only take a trap caused by a synchronous exception on element 0. If
+element 0 raises an exception, `vl` is not modified, and the trap is
+taken. If an element > 0 raises an exception, the corresponding trap
+is not taken, and the vector length `vl` is reduced to the index of
+the element that would have raised an exception.
+
+Load instructions may overwrite active destination vector register
+group elements past the element index at which the trap is reported.
+Similarly, fault-only-first load instructions may update active destination
+elements past the element that causes trimming of the vector length
+(but not past the original vector length). The values of these
+spurious updates do not have to correspond to the values in memory at
+the addressed memory locations. Non-idempotent memory locations can
+only be accessed when it is known the corresponding element load
+operation will not be restarted due to a trap or vector-length
+trimming.
+
+----
+ # Vector unit-stride fault-only-first loads
+
+ # vd destination, rs1 base address, vm is mask encoding (v0.t or <missing>)
+ vle8ff.v vd, (rs1), vm # 8-bit unit-stride fault-only-first load
+ vle16ff.v vd, (rs1), vm # 16-bit unit-stride fault-only-first load
+ vle32ff.v vd, (rs1), vm # 32-bit unit-stride fault-only-first load
+ vle64ff.v vd, (rs1), vm # 64-bit unit-stride fault-only-first load
+----
+
+----
+strlen example using unit-stride fault-only-first instruction
+
+include::example/strlen.s[lines=4..-1]
+----
+
+NOTE: There is a security concern with fault-on-first loads, as they
+can be used to probe for valid effective addresses. The unit-stride
+versions only allow probing a region immediately contiguous to a known
+region, and so reduce the security impact when used in unprivileged
+code. However, code running in S-mode can establish arbitrary page
+translations that allow probing of random guest physical addresses
+provided by a hypervisor. Strided and scatter/gather fault-only-first
+instructions are not provided due to lack of encoding space, but they
+can also represent a larger security hole, allowing even unprivileged
+software to easily check multiple random pages for accessibility
+without experiencing a trap. This standard does not address possible
+security mitigations for fault-only-first instructions.
+
+Even when an exception is not raised, implementations are permitted to process
+fewer than `vl` elements and reduce `vl` accordingly, but if `vstart`=0 and
+`vl`>0, then at least one element must be processed.
+
+When the fault-only-first instruction takes a trap due to an
+interrupt, implementations should not reduce `vl` and should instead
+set a `vstart` value.
+
+NOTE: When the fault-only-first instruction would trigger a debug
+data-watchpoint trap on an element after the first, implementations
+should not reduce `vl` but instead should trigger the debug trap as
+otherwise the event might be lost.
+
+[[sec-aos]]
+==== Vector Load/Store Segment Instructions
+
+The vector load/store segment instructions move multiple contiguous
+fields in memory to and from consecutively numbered vector registers.
+
+NOTE: The name "segment" reflects that the items moved are subarrays
+with homogeneous elements. These operations can be used to transpose
+arrays between memory and registers, and can support operations on
+"array-of-structures" datatypes by unpacking each field in a structure
+into a separate vector register.
+
+The three-bit `nf` field in the vector instruction encoding is an
+unsigned integer that contains one less than the number of fields per
+segment, _NFIELDS_.
+
+[[fig-nf]]
+.NFIELDS Encoding
+[cols="1,1,1,13"]
+|===
+3+| nf[2:0] | NFIELDS
+
+| 0 | 0 | 0 | 1
+| 0 | 0 | 1 | 2
+| 0 | 1 | 0 | 3
+| 0 | 1 | 1 | 4
+| 1 | 0 | 0 | 5
+| 1 | 0 | 1 | 6
+| 1 | 1 | 0 | 7
+| 1 | 1 | 1 | 8
+|===
+
+The EMUL setting must be such that EMUL * NFIELDS {le} 8, otherwise
+the instruction encoding is reserved.
+
+NOTE: The product ceil(EMUL) * NFIELDS represents the number of underlying
+vector registers that will be touched by a segmented load or store
+instruction. This constraint makes this total no larger than 1/4 of
+the architectural register file, and the same as for regular
+operations with EMUL=8.
+
+Each field will be held in successively numbered vector register
+groups. When EMUL>1, each field will occupy a vector register group
+held in multiple successively numbered vector registers, and the
+vector register group for each field must follow the usual vector
+register alignment constraints (e.g., when EMUL=2 and NFIELDS=4, each
+field's vector register group must start at an even vector register,
+but does not have to start at a multiple of 8 vector register number).
+
+If the vector register numbers accessed by the segment load or store
+would increment past 31, then the instruction encoding is reserved.
+
+NOTE: This constraint is to help allow for forward-compatibility with
+a possible future longer instruction encoding that has more
+addressable vector registers.
+
+The `vl` register gives the number of segments to move, which is
+equal to the number of elements transferred to each vector register
+group. Masking is also applied at the level of whole segments.
+
+For segment loads and stores, the individual memory accesses used to
+access fields within each segment are unordered with respect to each
+other even for ordered indexed segment loads and stores.
+
+The `vstart` value is in units of whole segments. If a trap occurs during
+access to a segment, it is implementation-defined whether a subset
+of the faulting segment's accesses are performed before the trap is taken.
+
+===== Vector Unit-Stride Segment Loads and Stores
+
+The vector unit-stride load and store segment instructions move packed
+contiguous segments into multiple destination vector register groups.
+
+NOTE: Where the segments hold structures with heterogeneous-sized
+fields, software can later unpack individual structure fields using
+additional instructions after the segment load brings data into the
+vector registers.
+
+The assembler prefixes `vlseg`/`vsseg` are used for unit-stride
+segment loads and stores respectively.
+
+----
+ # Format
+ vlseg<nf>e<eew>.v vd, (rs1), vm # Unit-stride segment load template
+ vsseg<nf>e<eew>.v vs3, (rs1), vm # Unit-stride segment store template
+
+ # Examples
+ vlseg8e8.v vd, (rs1), vm # Load eight vector registers with eight byte fields.
+
+ vsseg3e32.v vs3, (rs1), vm # Store packed vector of 3*4-byte segments from vs3,vs3+1,vs3+2 to memory
+----
+
+For loads, the `vd` register will hold the first field loaded from the
+segment. For stores, the `vs3` register is read to provide the first
+field to be stored to each segment.
+
+----
+ # Example 1
+ # Memory structure holds packed RGB pixels (24-bit data structure, 8bpp)
+ vsetvli a1, t0, e8, ta, ma
+ vlseg3e8.v v8, (a0), vm
+ # v8 holds the red pixels
+ # v9 holds the green pixels
+ # v10 holds the blue pixels
+
+ # Example 2
+ # Memory structure holds complex values, 32b for real and 32b for imaginary
+ vsetvli a1, t0, e32, ta, ma
+ vlseg2e32.v v8, (a0), vm
+ # v8 holds real
+ # v9 holds imaginary
+----
+
+There are also fault-only-first versions of the unit-stride instructions.
+
+----
+ # Template for vector fault-only-first unit-stride segment loads.
+ vlseg<nf>e<eew>ff.v vd, (rs1), vm # Unit-stride fault-only-first segment loads
+----
+
+For fault-only-first segment loads, if an exception is detected partway
+through accessing a segment, regardless of whether the element index is zero,
+it is implementation-defined whether a subset of the segment is loaded.
+
+These instructions may overwrite destination vector register group
+elements past the point at which a trap is reported or past the point
+at which vector length is trimmed.
+
+===== Vector Strided Segment Loads and Stores
+
+Vector strided segment loads and stores move contiguous segments where
+each segment is separated by the byte-stride offset given in the `rs2`
+GPR argument.
+
+NOTE: Negative and zero strides are supported.
+
+----
+ # Format
+ vlsseg<nf>e<eew>.v vd, (rs1), rs2, vm # Strided segment loads
+ vssseg<nf>e<eew>.v vs3, (rs1), rs2, vm # Strided segment stores
+
+ # Examples
+ vsetvli a1, t0, e8, ta, ma
+ vlsseg3e8.v v4, (x5), x6 # Load bytes at addresses x5+i*x6 into v4[i],
+ # and bytes at addresses x5+i*x6+1 into v5[i],
+ # and bytes at addresses x5+i*x6+2 into v6[i].
+
+ # Examples
+ vsetvli a1, t0, e32, ta, ma
+ vssseg2e32.v v2, (x5), x6 # Store words from v2[i] to address x5+i*x6
+ # and words from v3[i] to address x5+i*x6+4
+----
+
+Accesses to the fields within each segment can occur in any order,
+including the case where the byte stride is such that segments overlap
+in memory.
+
+===== Vector Indexed Segment Loads and Stores
+
+Vector indexed segment loads and stores move contiguous segments where
+each segment is located at an address given by adding the scalar base
+address in the `rs1` field to byte offsets in vector register `vs2`.
+Both ordered and unordered forms are provided, where the ordered forms
+access segments in element order. However, even for the ordered form,
+accesses to the fields within an individual segment are not ordered
+with respect to each other.
+
+The data vector register group has EEW=SEW, EMUL=LMUL, while the index
+vector register group has EEW encoded in the instruction with
+EMUL=(EEW/SEW)*LMUL.
+The EMUL * NFIELDS {le} 8 constraint applies to the data vector register group.
+
+----
+ # Format
+ vluxseg<nf>ei<eew>.v vd, (rs1), vs2, vm # Indexed-unordered segment loads
+ vloxseg<nf>ei<eew>.v vd, (rs1), vs2, vm # Indexed-ordered segment loads
+ vsuxseg<nf>ei<eew>.v vs3, (rs1), vs2, vm # Indexed-unordered segment stores
+ vsoxseg<nf>ei<eew>.v vs3, (rs1), vs2, vm # Indexed-ordered segment stores
+
+ # Examples
+ vsetvli a1, t0, e8, ta, ma
+ vluxseg3ei8.v v4, (x5), v3 # Load bytes at addresses x5+v3[i] into v4[i],
+ # and bytes at addresses x5+v3[i]+1 into v5[i],
+ # and bytes at addresses x5+v3[i]+2 into v6[i].
+
+ # Examples
+ vsetvli a1, t0, e32, ta, ma
+ vsuxseg2ei32.v v2, (x5), v5 # Store words from v2[i] to address x5+v5[i]
+ # and words from v3[i] to address x5+v5[i]+4
+----
+
+For vector indexed segment loads, the destination vector register
+groups cannot overlap the source vector register group (specified by
+`vs2`), else the instruction encoding is reserved.
+
+NOTE: This constraint supports restart of indexed segment loads
+that raise exceptions partway through loading a structure.
+
+==== Vector Load/Store Whole Register Instructions
+
+Format for Vector Load Whole Register Instructions under LOAD-FP major opcode
+
+////
+31 29 28 27 26 25 24 20 19 15 14 12 11 7 6 0
+ nf | mew| 00 | 1| 01000 | rs1 | width | vd |0000111| VL<nf>R
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x07, attr: 'VL*R*'},
+ {bits: 5, name: 'vd', attr: 'destination of load', type: 2},
+ {bits: 3, name: 'width'},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 8, attr: 'lumop'},
+ {bits: 1, name: 1, attr: 'vm'},
+ {bits: 2, name: 0x10000, attr: 'mop'},
+ {bits: 1, name: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+Format for Vector Store Whole Register Instructions under STORE-FP major opcode
+
+////
+31 29 28 27 26 25 24 20 19 15 14 12 11 7 6 0
+ nf | 0 | 00 | 1| 01000 | rs1 | 000 | vs3 |0100111| VS<nf>R
+////
+
+[wavedrom,,svg]
+....
+{reg: [
+ {bits: 7, name: 0x27, attr: 'VS*R*'},
+ {bits: 5, name: 'vs3', attr: 'store data', type: 2},
+ {bits: 3, name: 0x1000},
+ {bits: 5, name: 'rs1', attr: 'base address', type: 4},
+ {bits: 5, name: 8, attr: 'sumop'},
+ {bits: 1, name: 1, attr: 'vm'},
+ {bits: 2, name: 0x100, attr: 'mop'},
+ {bits: 1, name: 0x100, attr: 'mew'},
+ {bits: 3, name: 'nf'},
+]}
+....
+
+These instructions load and store whole vector register groups.
+
+NOTE: These instructions are intended to be used to save and restore
+vector registers when the type or length of the current contents of
+the vector register is not known, or where modifying `vl` and `vtype`
+would be costly. Examples include compiler register spills, vector
+function calls where values are passed in vector registers, interrupt
+handlers, and OS context switches. Software can determine the number
+of bytes transferred by reading the `vlenb` register.
+
+The load instructions have an EEW encoded in the `mew` and `width`
+fields following the pattern of regular unit-stride loads.
+
+NOTE: Because in-register byte layouts are identical to in-memory byte
+layouts, the same data is written to the destination register group
+regardless of EEW.
+Hence, it would have sufficed to provide only EEW=8 variants.
+The full set of EEW variants is provided so that the encoded EEW can be used
+as a hint to indicate the destination register group will next be accessed
+with this EEW, which aids implementations that rearrange data internally.
+
+The vector whole register store instructions are encoded similar to
+unmasked unit-stride store of elements with EEW=8.
+
+The `nf` field encodes how many vector registers to load and store using the NFIELDS encoding (Figure <<fig-nf>>).
+The encoded number of registers must be a power of 2 and the vector
+register numbers must be aligned as with a vector register group,
+otherwise the instruction encoding is reserved. NFIELDS
+indicates the number of vector registers to transfer, numbered
+successively after the base. Only NFIELDS values of 1, 2, 4, 8 are
+supported, with other values reserved. When multiple registers are
+transferred, the lowest-numbered vector register is held in the
+lowest-numbered memory addresses and successive vector register
+numbers are placed contiguously in memory.
+
+The instructions operate with an effective vector length,
+`evl`=NFIELDS*VLEN/EEW, regardless of current settings in `vtype` and
+`vl`. The usual property that no elements are written if `vstart`
+{ge} `vl` does not apply to these instructions. Instead, no elements
+are written if `vstart` {ge} `evl`.
+
+The instructions operate similarly to unmasked unit-stride load and
+store instructions, with the base address passed in the scalar `x`
+register specified by `rs1`.
+
+Implementations are allowed to raise a misaligned address exception on
+whole register loads and stores if the base address is not naturally
+aligned to the larger of the size of the encoded EEW in bytes (EEW/8)
+or the implementation's smallest supported SEW size in bytes
+(SEW~MIN~/8).
+
+NOTE: Allowing misaligned exceptions to be raised based on
+non-alignment to the encoded EEW simplifies the implementation of these
+instructions. Some subset implementations might not support smaller
+SEW widths, so are allowed to report misaligned exceptions for the
+smallest supported SEW even if larger than encoded EEW. An extreme
+non-standard implementation might have SEW~MIN~>XLEN for example. Software
+environments can mandate the minimum alignment requirements to support
+an ABI.
+
+----
+ # Format of whole register load and store instructions.
+ vl1r.v v3, (a0) # Pseudoinstruction equal to vl1re8.v
+
+ vl1re8.v v3, (a0) # Load v3 with VLEN/8 bytes held at address in a0
+ vl1re16.v v3, (a0) # Load v3 with VLEN/16 halfwords held at address in a0
+ vl1re32.v v3, (a0) # Load v3 with VLEN/32 words held at address in a0
+ vl1re64.v v3, (a0) # Load v3 with VLEN/64 doublewords held at address in a0
+
+ vl2r.v v2, (a0) # Pseudoinstruction equal to vl2re8.v
+
+ vl2re8.v v2, (a0) # Load v2-v3 with 2*VLEN/8 bytes from address in a0
+ vl2re16.v v2, (a0) # Load v2-v3 with 2*VLEN/16 halfwords held at address in a0
+ vl2re32.v v2, (a0) # Load v2-v3 with 2*VLEN/32 words held at address in a0
+ vl2re64.v v2, (a0) # Load v2-v3 with 2*VLEN/64 doublewords held at address in a0
+
+ vl4r.v v4, (a0) # Pseudoinstruction equal to vl4re8.v
+
+ vl4re8.v v4, (a0) # Load v4-v7 with 4*VLEN/8 bytes from address in a0
+ vl4re16.v v4, (a0)
+ vl4re32.v v4, (a0)
+ vl4re64.v v4, (a0)
+
+ vl8r.v v8, (a0) # Pseudoinstruction equal to vl8re8.v
+
+ vl8re8.v v8, (a0) # Load v8-v15 with 8*VLEN/8 bytes from address in a0
+ vl8re16.v v8, (a0)
+ vl8re32.v v8, (a0)
+ vl8re64.v v8, (a0)
+
+ vs1r.v v3, (a1) # Store v3 to address in a1
+ vs2r.v v2, (a1) # Store v2-v3 to address in a1
+ vs4r.v v4, (a1) # Store v4-v7 to address in a1
+ vs8r.v v8, (a1) # Store v8-v15 to address in a1
+----
+
+NOTE: Implementations should raise illegal instruction exceptions on
+`vl<nf>r` instructions for EEW values that are not supported.
+
+NOTE: We have considered adding a whole register mask load instruction
+(`vl1rm.v`) but have decided to omit from initial extension. The
+primary purpose would be to inform the microarchitecture that the data
+will be used as a mask. The same effect can be achieved with the
+following code sequence, whose cost is at most four instructions. Of
+these, the first could likely be removed as `vl` is often already
+in a scalar register, and the last might already be present if the
+following vector instruction needs a new SEW/LMUL. So, in best case
+only two instructions (of which only one performs vector operations) are needed to synthesize the effect of the
+dedicated instruction:
+----
+ csrr t0, vl # Save current vl (potentially not needed)
+ vsetvli t1, x0, e8, m8, ta, ma # Maximum VLMAX
+ vlm.v v0, (a0) # Load mask register
+ vsetvli x0, t0, <new type> # Restore vl (potentially already present)
+----
+
+=== Vector Memory Alignment Constraints
+
+If an element accessed by a vector memory instruction is not naturally
+aligned to the size of the element, either the element is transferred
+successfully or an address misaligned exception is raised on that
+element.
+
+Support for misaligned vector memory accesses is independent of an
+implementation's support for misaligned scalar memory accesses.
+
+NOTE: An implementation may have neither, one, or both scalar and
+vector memory accesses support some or all misaligned accesses in
+hardware. A separate PMA should be defined to determine if vector
+misaligned accesses are supported in the associated address range.
+
+Vector misaligned memory accesses follow the same rules for atomicity
+as scalar misaligned memory accesses.
+
+=== Vector Memory Consistency Model
+
+Vector memory instructions appear to execute in program order on the
+local hart.
+
+Vector memory instructions follow RVWMO at the instruction level.
+If the Ztso extension is implemented, vector memory instructions additionally
+follow RVTSO at the instruction level.
+
+Except for vector indexed-ordered loads and stores, element operations
+are unordered within the instruction.
+
+Vector indexed-ordered loads and stores read and write elements
+from/to memory in element order respectively,
+obeying RVWMO at the element level.
+
+NOTE: Ztso only imposes RVTSO at the instruction level; intra-instruction
+ordering follows RVWMO regardless of whether Ztso is implemented.
+
+NOTE: More formal definitions required.
+
+Instructions affected by the vector length register `vl` have a control
+dependency on `vl`, rather than a data dependency.
+Similarly, masked vector instructions have a control dependency on the source
+mask register, rather than a data dependency.
+
+NOTE: Treating the vector length and mask as control rather than data
+typically matches the semantics of the corresponding scalar code, where branch
+instructions ordinarily would have been used.
+Treating the mask as control allows masked vector load instructions to access
+memory before the mask value is known, without the need for
+a misspeculation-recovery mechanism.
+
+=== Vector Arithmetic Instruction Formats
+
+The vector arithmetic instructions use a new major opcode (OP-V =
+1010111~2~) which neighbors OP-FP. The three-bit `funct3` field is
+used to define sub-categories of vector instructions.
+
+include::images/wavedrom/valu-format.adoc[]
+
+[[sec-arithmetic-encoding]]
+==== Vector Arithmetic Instruction encoding
+
+The `funct3` field encodes the operand type and source locations.
+
+.funct3
+[cols="1,1,1,3,5,5"]
+|===
+3+| funct3[2:0] | Category | Operands | Type of scalar operand
+
+| 0 | 0 | 0 | OPIVV | vector-vector | N/A
+| 0 | 0 | 1 | OPFVV | vector-vector | N/A
+| 0 | 1 | 0 | OPMVV | vector-vector | N/A
+| 0 | 1 | 1 | OPIVI | vector-immediate | `imm[4:0]`
+| 1 | 0 | 0 | OPIVX | vector-scalar | GPR `x` register `rs1`
+| 1 | 0 | 1 | OPFVF | vector-scalar | FP `f` register `rs1`
+| 1 | 1 | 0 | OPMVX | vector-scalar | GPR `x` register `rs1`
+| 1 | 1 | 1 | OPCFG | scalars-imms | GPR `x` register `rs1` & `rs2`/`imm`
+|===
+
+Integer operations are performed using unsigned or two's-complement
+signed integer arithmetic depending on the opcode.
+
+NOTE: In this discussion, fixed-point operations are
+considered to be integer operations.
+
+All standard vector floating-point arithmetic operations follow the
+IEEE-754/2008 standard. All vector floating-point operations use the
+dynamic rounding mode in the `frm` register. Use of the `frm` field
+when it contains an invalid rounding mode by any vector floating-point
+instruction--even those that do not depend on the rounding mode, or
+when `vl`=0, or when `vstart` {ge} `vl`--is reserved.
+
+NOTE: All vector floating-point code will rely on a valid value in
+`frm`. Implementations can make all vector FP instructions report
+exceptions when the rounding mode is invalid to simplify control
+logic.
+
+Vector-vector operations take two vectors of operands from vector
+register groups specified by `vs2` and `vs1` respectively.
+
+Vector-scalar operations can have three possible forms. In all three forms,
+the vector register group operand is specified by `vs2`. The second
+scalar source operand comes from one of three alternative sources:
+
+. For integer operations, the scalar can be a 5-bit immediate, `imm[4:0]`, encoded
+in the `rs1` field. The value is sign-extended to SEW bits, unless
+otherwise specified.
+
+. For integer operations, the scalar can be taken from the scalar `x`
+register specified by `rs1`. If XLEN>SEW, the least-significant SEW
+bits of the `x` register are used, unless otherwise specified. If
+XLEN<SEW, the value from the `x` register is sign-extended to SEW
+bits.
+
+. For floating-point operations, the scalar can be taken from a scalar
+`f` register. If FLEN > SEW, the value in the `f` registers is
+checked for a valid NaN-boxed value, in which case the
+least-significant SEW bits of the `f` register are used, else the
+canonical NaN value is used. Vector instructions where any
+floating-point vector operand's EEW is not a supported floating-point
+type width (which includes when FLEN < SEW) are reserved.
+
+NOTE: Some instructions _zero_-extend the 5-bit immediate, and denote this
+by naming the immediate `uimm` in the assembly syntax.
+
+NOTE: When adding a vector extension to the Zfinx/Zdinx/Zhinx
+extensions, floating-point scalar arguments are taken from the `x`
+registers. NaN-boxing is not supported in these extensions, and so
+the vector floating-point scalar value is produced using the same
+rules as for an integer scalar operand (i.e., when XLEN > SEW use the
+lowest SEW bits, when XLEN < SEW use the sign-extended value).
+
+Vector arithmetic instructions are masked under control of the `vm`
+field.
+
+----
+# Assembly syntax pattern for vector binary arithmetic instructions
+
+# Operations returning vector results, masked by vm (v0.t, <nothing>)
+vop.vv vd, vs2, vs1, vm # integer vector-vector vd[i] = vs2[i] op vs1[i]
+vop.vx vd, vs2, rs1, vm # integer vector-scalar vd[i] = vs2[i] op x[rs1]
+vop.vi vd, vs2, imm, vm # integer vector-immediate vd[i] = vs2[i] op imm
+
+vfop.vv vd, vs2, vs1, vm # FP vector-vector operation vd[i] = vs2[i] fop vs1[i]
+vfop.vf vd, vs2, rs1, vm # FP vector-scalar operation vd[i] = vs2[i] fop f[rs1]
+----
+
+NOTE: In the encoding, `vs2` is the first operand, while `rs1/imm`
+is the second operand. This is the opposite to the standard scalar
+ordering. This arrangement retains the existing encoding conventions
+that instructions that read only one scalar register, read it from
+`rs1`, and that 5-bit immediates are sourced from the `rs1` field.
+
+----
+# Assembly syntax pattern for vector ternary arithmetic instructions (multiply-add)
+
+# Integer operations overwriting sum input
+vop.vv vd, vs1, vs2, vm # vd[i] = vs1[i] * vs2[i] + vd[i]
+vop.vx vd, rs1, vs2, vm # vd[i] = x[rs1] * vs2[i] + vd[i]
+
+# Integer operations overwriting product input
+vop.vv vd, vs1, vs2, vm # vd[i] = vs1[i] * vd[i] + vs2[i]
+vop.vx vd, rs1, vs2, vm # vd[i] = x[rs1] * vd[i] + vs2[i]
+
+# Floating-point operations overwriting sum input
+vfop.vv vd, vs1, vs2, vm # vd[i] = vs1[i] * vs2[i] + vd[i]
+vfop.vf vd, rs1, vs2, vm # vd[i] = f[rs1] * vs2[i] + vd[i]
+
+# Floating-point operations overwriting product input
+vfop.vv vd, vs1, vs2, vm # vd[i] = vs1[i] * vd[i] + vs2[i]
+vfop.vf vd, rs1, vs2, vm # vd[i] = f[rs1] * vd[i] + vs2[i]
+----
+
+NOTE: For ternary multiply-add operations, the assembler syntax always
+places the destination vector register first, followed by either `rs1`
+or `vs1`, then `vs2`. This ordering provides a more natural reading
+of the assembler for these ternary operations, as the multiply
+operands are always next to each other.
+
+[[sec-widening]]
+==== Widening Vector Arithmetic Instructions
+
+A few vector arithmetic instructions are defined to be __widening__
+operations where the destination vector register group has EEW=2*SEW
+and EMUL=2*LMUL. These are generally given a `vw*` prefix on the
+opcode, or `vfw*` for vector floating-point instructions.
+
+The first vector register group operand can be either single or
+double-width.
+
+----
+Assembly syntax pattern for vector widening arithmetic instructions
+
+# Double-width result, two single-width sources: 2*SEW = SEW op SEW
+vwop.vv vd, vs2, vs1, vm # integer vector-vector vd[i] = vs2[i] op vs1[i]
+vwop.vx vd, vs2, rs1, vm # integer vector-scalar vd[i] = vs2[i] op x[rs1]
+
+# Double-width result, first source double-width, second source single-width: 2*SEW = 2*SEW op SEW
+vwop.wv vd, vs2, vs1, vm # integer vector-vector vd[i] = vs2[i] op vs1[i]
+vwop.wx vd, vs2, rs1, vm # integer vector-scalar vd[i] = vs2[i] op x[rs1]
+----
+
+NOTE: Originally, a `w` suffix was used on opcode, but this could be
+confused with the use of a `w` suffix to mean word-sized operations in
+doubleword integers, so the `w` was moved to prefix.
+
+NOTE: The floating-point widening operations were changed to `vfw*`
+from `vwf*` to be more consistent with any scalar widening
+floating-point operations that will be written as `fw*`.
+
+Widening instruction encodings must follow the constraints in Section
+<<sec-vec-operands>>.
+
+[[sec-narrowing]]
+==== Narrowing Vector Arithmetic Instructions
+
+A few instructions are provided to convert double-width source vectors
+into single-width destination vectors. These instructions convert a
+vector register group specified by `vs2` with EEW/EMUL=2*SEW/2*LMUL to a vector register
+group with the current SEW/LMUL setting. Where there is a second
+source vector register group (specified by `vs1`), this has the same
+(narrower) width as the result (i.e., EEW=SEW).
+
+NOTE: An alternative design decision would have been to treat SEW/LMUL
+as defining the size of the source vector register group. The choice
+here is motivated by the belief the chosen approach will require fewer
+`vtype` changes.
+
+NOTE: Compare operations that set a mask register are also
+implicitly a narrowing operation.
+
+A `vn*` prefix on the opcode is used to distinguish these instructions
+in the assembler, or a `vfn*` prefix for narrowing floating-point
+opcodes. The double-width source vector register group is signified
+by a `w` in the source operand suffix (e.g., `vnsra.wv`)
+
+----
+Assembly syntax pattern for vector narrowing arithmetic instructions
+
+# Single-width result vd, double-width source vs2, single-width source vs1/rs1
+# SEW = 2*SEW op SEW
+vnop.wv vd, vs2, vs1, vm # integer vector-vector vd[i] = vs2[i] op vs1[i]
+vnop.wx vd, vs2, rs1, vm # integer vector-scalar vd[i] = vs2[i] op x[rs1]
+----
+
+Narrowing instruction encodings must follow the constraints in Section
+<<sec-vec-operands>>.
+
+[[sec-vector-integer]]
+=== Vector Integer Arithmetic Instructions
+
+A set of vector integer arithmetic instructions is provided. Unless
+otherwise stated, integer operations wrap around on overflow.
+
+==== Vector Single-Width Integer Add and Subtract
+
+Vector integer add and subtract are provided. Reverse-subtract
+instructions are also provided for the vector-scalar forms.
+
+----
+# Integer adds.
+vadd.vv vd, vs2, vs1, vm # Vector-vector
+vadd.vx vd, vs2, rs1, vm # vector-scalar
+vadd.vi vd, vs2, imm, vm # vector-immediate
+
+# Integer subtract
+vsub.vv vd, vs2, vs1, vm # Vector-vector
+vsub.vx vd, vs2, rs1, vm # vector-scalar
+
+# Integer reverse subtract
+vrsub.vx vd, vs2, rs1, vm # vd[i] = x[rs1] - vs2[i]
+vrsub.vi vd, vs2, imm, vm # vd[i] = imm - vs2[i]
+----
+
+NOTE: A vector of integer values can be negated using a
+reverse-subtract instruction with a scalar operand of `x0`. An
+assembly pseudoinstruction `vneg.v vd,vs` = `vrsub.vx vd,vs,x0` is provided.
+
+==== Vector Widening Integer Add/Subtract
+
+The widening add/subtract instructions are provided in both signed and
+unsigned variants, depending on whether the narrower source operands
+are first sign- or zero-extended before forming the double-width sum.
+
+----
+# Widening unsigned integer add/subtract, 2*SEW = SEW +/- SEW
+vwaddu.vv vd, vs2, vs1, vm # vector-vector
+vwaddu.vx vd, vs2, rs1, vm # vector-scalar
+vwsubu.vv vd, vs2, vs1, vm # vector-vector
+vwsubu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Widening signed integer add/subtract, 2*SEW = SEW +/- SEW
+vwadd.vv vd, vs2, vs1, vm # vector-vector
+vwadd.vx vd, vs2, rs1, vm # vector-scalar
+vwsub.vv vd, vs2, vs1, vm # vector-vector
+vwsub.vx vd, vs2, rs1, vm # vector-scalar
+
+# Widening unsigned integer add/subtract, 2*SEW = 2*SEW +/- SEW
+vwaddu.wv vd, vs2, vs1, vm # vector-vector
+vwaddu.wx vd, vs2, rs1, vm # vector-scalar
+vwsubu.wv vd, vs2, vs1, vm # vector-vector
+vwsubu.wx vd, vs2, rs1, vm # vector-scalar
+
+# Widening signed integer add/subtract, 2*SEW = 2*SEW +/- SEW
+vwadd.wv vd, vs2, vs1, vm # vector-vector
+vwadd.wx vd, vs2, rs1, vm # vector-scalar
+vwsub.wv vd, vs2, vs1, vm # vector-vector
+vwsub.wx vd, vs2, rs1, vm # vector-scalar
+----
+
+NOTE: An integer value can be doubled in width using the widening add
+instructions with a scalar operand of `x0`. Assembly
+pseudoinstructions `vwcvt.x.x.v vd,vs,vm` = `vwadd.vx vd,vs,x0,vm` and
+`vwcvtu.x.x.v vd,vs,vm` = `vwaddu.vx vd,vs,x0,vm` are provided.
+
+==== Vector Integer Extension
+
+The vector integer extension instructions zero- or sign-extend a
+source vector integer operand with EEW less than SEW to fill SEW-sized
+elements in the destination. The EEW of the source is 1/2, 1/4, or
+1/8 of SEW, while EMUL of the source is (EEW/SEW)*LMUL. The
+destination has EEW equal to SEW and EMUL equal to LMUL.
+
+----
+vzext.vf2 vd, vs2, vm # Zero-extend SEW/2 source to SEW destination
+vsext.vf2 vd, vs2, vm # Sign-extend SEW/2 source to SEW destination
+vzext.vf4 vd, vs2, vm # Zero-extend SEW/4 source to SEW destination
+vsext.vf4 vd, vs2, vm # Sign-extend SEW/4 source to SEW destination
+vzext.vf8 vd, vs2, vm # Zero-extend SEW/8 source to SEW destination
+vsext.vf8 vd, vs2, vm # Sign-extend SEW/8 source to SEW destination
+----
+
+If the source EEW is not a supported width, or source EMUL would be
+below the minimum legal LMUL, the instruction encoding is reserved.
+
+NOTE: Standard vector load instructions access memory values that are
+the same size as the destination register elements. Some application
+code needs to operate on a range of operand widths in a wider element,
+for example, loading a byte from memory and adding to an eight-byte
+element. To avoid having to provide the cross-product of the number
+of vector load instructions by the number of data types (byte, word,
+halfword, and also signed/unsigned variants), we instead add explicit
+extension instructions that can be used if an appropriate widening
+arithmetic instruction is not available.
+
+==== Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+
+To support multi-word integer arithmetic, instructions that operate on
+a carry bit are provided. For each operation (add or subtract), two
+instructions are provided: one to provide the result (SEW width), and
+the second to generate the carry output (single bit encoded as a mask
+boolean).
+
+The carry inputs and outputs are represented using the mask register
+layout as described in Section <<sec-mask-register-layout>>. Due to
+encoding constraints, the carry input must come from the implicit `v0`
+register, but carry outputs can be written to any vector register that
+respects the source/destination overlap restrictions.
+
+`vadc` and `vsbc` add or subtract the source operands and the carry-in or
+borrow-in, and write the result to vector register `vd`.
+These instructions are encoded as masked instructions (`vm=0`), but they operate
+on and write back all body elements.
+Encodings corresponding to the unmasked versions (`vm=1`) are reserved.
+
+`vmadc` and `vmsbc` add or subtract the source operands, optionally
+add the carry-in or subtract the borrow-in if masked (`vm=0`), and
+write the result back to mask register `vd`. If unmasked (`vm=1`),
+there is no carry-in or borrow-in. These instructions operate on and
+write back all body elements, even if masked. Because these
+instructions produce a mask value, they always operate with a
+tail-agnostic policy.
+
+----
+ # Produce sum with carry.
+
+ # vd[i] = vs2[i] + vs1[i] + v0.mask[i]
+ vadc.vvm vd, vs2, vs1, v0 # Vector-vector
+
+ # vd[i] = vs2[i] + x[rs1] + v0.mask[i]
+ vadc.vxm vd, vs2, rs1, v0 # Vector-scalar
+
+ # vd[i] = vs2[i] + imm + v0.mask[i]
+ vadc.vim vd, vs2, imm, v0 # Vector-immediate
+
+ # Produce carry out in mask register format
+
+ # vd.mask[i] = carry_out(vs2[i] + vs1[i] + v0.mask[i])
+ vmadc.vvm vd, vs2, vs1, v0 # Vector-vector
+
+ # vd.mask[i] = carry_out(vs2[i] + x[rs1] + v0.mask[i])
+ vmadc.vxm vd, vs2, rs1, v0 # Vector-scalar
+
+ # vd.mask[i] = carry_out(vs2[i] + imm + v0.mask[i])
+ vmadc.vim vd, vs2, imm, v0 # Vector-immediate
+
+ # vd.mask[i] = carry_out(vs2[i] + vs1[i])
+ vmadc.vv vd, vs2, vs1 # Vector-vector, no carry-in
+
+ # vd.mask[i] = carry_out(vs2[i] + x[rs1])
+ vmadc.vx vd, vs2, rs1 # Vector-scalar, no carry-in
+
+ # vd.mask[i] = carry_out(vs2[i] + imm)
+ vmadc.vi vd, vs2, imm # Vector-immediate, no carry-in
+----
+
+Because implementing a carry propagation requires executing two
+instructions with unchanged inputs, destructive accumulations will
+require an additional move to obtain correct results.
+
+----
+ # Example multi-word arithmetic sequence, accumulating into v4
+ vmadc.vvm v1, v4, v8, v0 # Get carry into temp register v1
+ vadc.vvm v4, v4, v8, v0 # Calc new sum
+ vmmv.m v0, v1 # Move temp carry into v0 for next word
+----
+
+The subtract with borrow instruction `vsbc` performs the equivalent
+function to support long word arithmetic for subtraction. There are
+no subtract with immediate instructions.
+
+----
+ # Produce difference with borrow.
+
+ # vd[i] = vs2[i] - vs1[i] - v0.mask[i]
+ vsbc.vvm vd, vs2, vs1, v0 # Vector-vector
+
+ # vd[i] = vs2[i] - x[rs1] - v0.mask[i]
+ vsbc.vxm vd, vs2, rs1, v0 # Vector-scalar
+
+ # Produce borrow out in mask register format
+
+ # vd.mask[i] = borrow_out(vs2[i] - vs1[i] - v0.mask[i])
+ vmsbc.vvm vd, vs2, vs1, v0 # Vector-vector
+
+ # vd.mask[i] = borrow_out(vs2[i] - x[rs1] - v0.mask[i])
+ vmsbc.vxm vd, vs2, rs1, v0 # Vector-scalar
+
+ # vd.mask[i] = borrow_out(vs2[i] - vs1[i])
+ vmsbc.vv vd, vs2, vs1 # Vector-vector, no borrow-in
+
+ # vd.mask[i] = borrow_out(vs2[i] - x[rs1])
+ vmsbc.vx vd, vs2, rs1 # Vector-scalar, no borrow-in
+----
+
+For `vmsbc`, the borrow is defined to be 1 iff the difference, prior to
+truncation, is negative.
+
+For `vadc` and `vsbc`, the instruction encoding is reserved if the
+destination vector register is `v0`.
+
+NOTE: This constraint corresponds to the constraint on masked vector
+operations that overwrite the mask register.
+
+==== Vector Bitwise Logical Instructions
+
+----
+# Bitwise logical operations.
+vand.vv vd, vs2, vs1, vm # Vector-vector
+vand.vx vd, vs2, rs1, vm # vector-scalar
+vand.vi vd, vs2, imm, vm # vector-immediate
+
+vor.vv vd, vs2, vs1, vm # Vector-vector
+vor.vx vd, vs2, rs1, vm # vector-scalar
+vor.vi vd, vs2, imm, vm # vector-immediate
+
+vxor.vv vd, vs2, vs1, vm # Vector-vector
+vxor.vx vd, vs2, rs1, vm # vector-scalar
+vxor.vi vd, vs2, imm, vm # vector-immediate
+----
+
+NOTE: With an immediate of -1, scalar-immediate forms of the `vxor`
+instruction provide a bitwise NOT operation. This is provided as
+an assembler pseudoinstruction `vnot.v vd,vs,vm` = `vxor.vi vd,vs,-1,vm`.
+
+==== Vector Single-Width Shift Instructions
+
+A full set of vector shift instructions are provided, including
+logical shift left (`sll`), and logical (zero-extending `srl`) and
+arithmetic (sign-extending `sra`) shift right. The data to be shifted
+is in the vector register group specified by `vs2` and the shift
+amount value can come from a vector register group `vs1`, a scalar
+integer register `rs1`, or a zero-extended 5-bit immediate. Only the low
+lg2(SEW) bits of the shift-amount value are used to control the shift
+amount.
+
+----
+# Bit shift operations
+vsll.vv vd, vs2, vs1, vm # Vector-vector
+vsll.vx vd, vs2, rs1, vm # vector-scalar
+vsll.vi vd, vs2, uimm, vm # vector-immediate
+
+vsrl.vv vd, vs2, vs1, vm # Vector-vector
+vsrl.vx vd, vs2, rs1, vm # vector-scalar
+vsrl.vi vd, vs2, uimm, vm # vector-immediate
+
+vsra.vv vd, vs2, vs1, vm # Vector-vector
+vsra.vx vd, vs2, rs1, vm # vector-scalar
+vsra.vi vd, vs2, uimm, vm # vector-immediate
+----
+
+==== Vector Narrowing Integer Right Shift Instructions
+
+The narrowing right shifts extract a smaller field from a wider
+operand and have both zero-extending (`srl`) and sign-extending
+(`sra`) forms. The shift amount can come from a vector register
+group, or a scalar `x` register, or a zero-extended 5-bit immediate.
+The low lg2(2*SEW) bits of the shift-amount value are
+used (e.g., the low 6 bits for a SEW=64-bit to SEW=32-bit narrowing
+operation).
+
+----
+ # Narrowing shift right logical, SEW = (2*SEW) >> SEW
+ vnsrl.wv vd, vs2, vs1, vm # vector-vector
+ vnsrl.wx vd, vs2, rs1, vm # vector-scalar
+ vnsrl.wi vd, vs2, uimm, vm # vector-immediate
+
+ # Narrowing shift right arithmetic, SEW = (2*SEW) >> SEW
+ vnsra.wv vd, vs2, vs1, vm # vector-vector
+ vnsra.wx vd, vs2, rs1, vm # vector-scalar
+ vnsra.wi vd, vs2, uimm, vm # vector-immediate
+----
+
+NOTE: Future extensions might add support for versions that narrow to
+a destination that is 1/4 the width of the source.
+
+NOTE: An integer value can be halved in width using the narrowing integer
+shift instructions with a scalar operand of `x0`. An assembly
+pseudoinstruction is provided `vncvt.x.x.w vd,vs,vm` = `vnsrl.wx vd,vs,x0,vm`.
+
+==== Vector Integer Compare Instructions
+
+The following integer compare instructions write 1 to the destination
+mask register element if the comparison evaluates to true, and 0
+otherwise. The destination mask vector is always held in a single
+vector register, with a layout of elements as described in Section
+<<sec-mask-register-layout>>. The destination mask vector register
+may be the same as the source vector mask register (`v0`).
+
+----
+# Set if equal
+vmseq.vv vd, vs2, vs1, vm # Vector-vector
+vmseq.vx vd, vs2, rs1, vm # vector-scalar
+vmseq.vi vd, vs2, imm, vm # vector-immediate
+
+# Set if not equal
+vmsne.vv vd, vs2, vs1, vm # Vector-vector
+vmsne.vx vd, vs2, rs1, vm # vector-scalar
+vmsne.vi vd, vs2, imm, vm # vector-immediate
+
+# Set if less than, unsigned
+vmsltu.vv vd, vs2, vs1, vm # Vector-vector
+vmsltu.vx vd, vs2, rs1, vm # Vector-scalar
+
+# Set if less than, signed
+vmslt.vv vd, vs2, vs1, vm # Vector-vector
+vmslt.vx vd, vs2, rs1, vm # vector-scalar
+
+# Set if less than or equal, unsigned
+vmsleu.vv vd, vs2, vs1, vm # Vector-vector
+vmsleu.vx vd, vs2, rs1, vm # vector-scalar
+vmsleu.vi vd, vs2, imm, vm # Vector-immediate
+
+# Set if less than or equal, signed
+vmsle.vv vd, vs2, vs1, vm # Vector-vector
+vmsle.vx vd, vs2, rs1, vm # vector-scalar
+vmsle.vi vd, vs2, imm, vm # vector-immediate
+
+# Set if greater than, unsigned
+vmsgtu.vx vd, vs2, rs1, vm # Vector-scalar
+vmsgtu.vi vd, vs2, imm, vm # Vector-immediate
+
+# Set if greater than, signed
+vmsgt.vx vd, vs2, rs1, vm # Vector-scalar
+vmsgt.vi vd, vs2, imm, vm # Vector-immediate
+
+# Following two instructions are not provided directly
+# Set if greater than or equal, unsigned
+# vmsgeu.vx vd, vs2, rs1, vm # Vector-scalar
+# Set if greater than or equal, signed
+# vmsge.vx vd, vs2, rs1, vm # Vector-scalar
+----
+
+The following table indicates how all comparisons are implemented in
+native machine code.
+
+----
+Comparison Assembler Mapping Assembler Pseudoinstruction
+
+va < vb vmslt{u}.vv vd, va, vb, vm
+va <= vb vmsle{u}.vv vd, va, vb, vm
+va > vb vmslt{u}.vv vd, vb, va, vm vmsgt{u}.vv vd, va, vb, vm
+va >= vb vmsle{u}.vv vd, vb, va, vm vmsge{u}.vv vd, va, vb, vm
+
+va < x vmslt{u}.vx vd, va, x, vm
+va <= x vmsle{u}.vx vd, va, x, vm
+va > x vmsgt{u}.vx vd, va, x, vm
+va >= x see below
+
+va < i vmsle{u}.vi vd, va, i-1, vm vmslt{u}.vi vd, va, i, vm
+va <= i vmsle{u}.vi vd, va, i, vm
+va > i vmsgt{u}.vi vd, va, i, vm
+va >= i vmsgt{u}.vi vd, va, i-1, vm vmsge{u}.vi vd, va, i, vm
+
+va, vb vector register groups
+x scalar integer register
+i immediate
+----
+
+NOTE: The immediate forms of `vmslt{u}.vi` are not provided as the
+immediate value can be decreased by 1 and the `vmsle{u}.vi` variants
+used instead. The `vmsle.vi` range is -16 to 15, resulting in an
+effective `vmslt.vi` range of -15 to 16. The `vmsleu.vi` range is 0
+to 15 giving an effective `vmsltu.vi` range of 1 to 16 (Note,
+`vmsltu.vi` with immediate 0 is not useful as it is always
+false).
+
+NOTE: Because the 5-bit vector immediates are always sign-extended,
+when the high bit of the `simm5` immediate is set, `vmsleu.vi` also
+supports unsigned immediate values in the range `2^SEW^-16` to
+`2^SEW^-1`, allowing corresponding `vmsltu.vi` compares against
+unsigned immediates in the range `2^SEW^-15` to `2^SEW^`. Note that
+`vmsltu.vi` with immediate `2^SEW^` is not useful as it is always
+true.
+
+Similarly, `vmsge{u}.vi` is not provided and the compare is
+implemented using `vmsgt{u}.vi` with the immediate decremented by one.
+The resulting effective `vmsge.vi` range is -15 to 16, and the
+resulting effective `vmsgeu.vi` range is 1 to 16 (Note, `vmsgeu.vi` with
+immediate 0 is not useful as it is always true).
+
+NOTE: The `vmsgt` forms for register scalar and immediates are provided
+to allow a single compare instruction to provide the correct
+polarity of mask value without using additional mask logical
+instructions.
+
+To reduce encoding space, the `vmsge{u}.vx` form is not directly
+provided, and so the `va {ge} x` case requires special treatment.
+
+NOTE: The `vmsge{u}.vx` could potentially be encoded in a
+non-orthogonal way under the unused OPIVI variant of `vmslt{u}`. These
+would be the only instructions in OPIVI that use a scalar `x`register
+however. Alternatively, a further two funct6 encodings could be used,
+but these would have a different operand format (writes to mask
+register) than others in the same group of 8 funct6 encodings. The
+current PoR is to omit these instructions and to synthesize where
+needed as described below.
+
+The `vmsge{u}.vx` operation can be synthesized by reducing the
+value of `x` by 1 and using the `vmsgt{u}.vx` instruction, when it is
+known that this will not underflow the representation in `x`.
+
+----
+Sequences to synthesize `vmsge{u}.vx` instruction
+
+va >= x, x > minimum
+
+ addi t0, x, -1; vmsgt{u}.vx vd, va, t0, vm
+----
+
+The above sequence will usually be the most efficient implementation,
+but assembler pseudoinstructions can be provided for cases where the
+range of `x` is unknown.
+
+----
+unmasked va >= x
+
+ pseudoinstruction: vmsge{u}.vx vd, va, x
+ expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
+
+masked va >= x, vd != v0
+
+ pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t
+ expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
+
+masked va >= x, vd == v0
+
+ pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
+ expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
+
+masked va >= x, any vd
+
+ pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
+ expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd, vd, v0; vmor.mm vd, vt, vd
+
+ The vt argument to the pseudoinstruction must name a temporary vector register that is
+ not same as vd and which will be clobbered by the pseudoinstruction
+----
+
+Compares effectively AND in the mask under a mask-undisturbed policy if the destination register is `v0`, e.g.,
+
+----
+ # (a < b) && (b < c) in two instructions when mask-undisturbed
+ vmslt.vv v0, va, vb # All body elements written
+ vmslt.vv v0, vb, vc, v0.t # Only update at set mask
+----
+
+Compares write mask registers, and so always operate under a
+tail-agnostic policy.
+
+==== Vector Integer Min/Max Instructions
+
+Signed and unsigned integer minimum and maximum instructions are
+supported.
+
+----
+# Unsigned minimum
+vminu.vv vd, vs2, vs1, vm # Vector-vector
+vminu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Signed minimum
+vmin.vv vd, vs2, vs1, vm # Vector-vector
+vmin.vx vd, vs2, rs1, vm # vector-scalar
+
+# Unsigned maximum
+vmaxu.vv vd, vs2, vs1, vm # Vector-vector
+vmaxu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Signed maximum
+vmax.vv vd, vs2, vs1, vm # Vector-vector
+vmax.vx vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Single-Width Integer Multiply Instructions
+
+The single-width multiply instructions perform a SEW-bit*SEW-bit
+multiply to generate a 2*SEW-bit product, then return one half of the
+product in the SEW-bit-wide destination. The `*mul*` versions write
+the low word of the product to the destination register, while the
+`*mulh*` versions write the high word of the product to the
+destination register.
+
+----
+# Signed multiply, returning low bits of product
+vmul.vv vd, vs2, vs1, vm # Vector-vector
+vmul.vx vd, vs2, rs1, vm # vector-scalar
+
+# Signed multiply, returning high bits of product
+vmulh.vv vd, vs2, vs1, vm # Vector-vector
+vmulh.vx vd, vs2, rs1, vm # vector-scalar
+
+# Unsigned multiply, returning high bits of product
+vmulhu.vv vd, vs2, vs1, vm # Vector-vector
+vmulhu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Signed(vs2)-Unsigned multiply, returning high bits of product
+vmulhsu.vv vd, vs2, vs1, vm # Vector-vector
+vmulhsu.vx vd, vs2, rs1, vm # vector-scalar
+----
+
+NOTE: There is no `vmulhus.vx` opcode to return high half of
+unsigned-vector * signed-scalar product. The scalar can be splatted
+to a vector, then a `vmulhsu.vv` used.
+
+NOTE: The current `vmulh*` opcodes perform simple fractional
+multiplies, but with no option to scale, round, and/or saturate the
+result. A possible future extension can consider variants of `vmulh`,
+`vmulhu`, `vmulhsu` that use the `vxrm` rounding mode when discarding
+low half of product. There is no possibility of overflow in these
+cases.
+
+==== Vector Integer Divide Instructions
+
+The divide and remainder instructions are equivalent to the RISC-V
+standard scalar integer multiply/divides, with the same results for
+extreme inputs.
+
+----
+ # Unsigned divide.
+ vdivu.vv vd, vs2, vs1, vm # Vector-vector
+ vdivu.vx vd, vs2, rs1, vm # vector-scalar
+
+ # Signed divide
+ vdiv.vv vd, vs2, vs1, vm # Vector-vector
+ vdiv.vx vd, vs2, rs1, vm # vector-scalar
+
+ # Unsigned remainder
+ vremu.vv vd, vs2, vs1, vm # Vector-vector
+ vremu.vx vd, vs2, rs1, vm # vector-scalar
+
+ # Signed remainder
+ vrem.vv vd, vs2, vs1, vm # Vector-vector
+ vrem.vx vd, vs2, rs1, vm # vector-scalar
+----
+
+NOTE: The decision to include integer divide and remainder was
+contentious. The argument in favor is that without a standard
+instruction, software would have to pick some algorithm to perform the
+operation, which would likely perform poorly on some
+microarchitectures versus others.
+
+NOTE: There is no instruction to perform a "scalar divide by vector"
+operation.
+
+==== Vector Widening Integer Multiply Instructions
+
+The widening integer multiply instructions return the full 2*SEW-bit
+product from an SEW-bit*SEW-bit multiply.
+
+----
+# Widening signed-integer multiply
+vwmul.vv vd, vs2, vs1, vm # vector-vector
+vwmul.vx vd, vs2, rs1, vm # vector-scalar
+
+# Widening unsigned-integer multiply
+vwmulu.vv vd, vs2, vs1, vm # vector-vector
+vwmulu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Widening signed(vs2)-unsigned integer multiply
+vwmulsu.vv vd, vs2, vs1, vm # vector-vector
+vwmulsu.vx vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Single-Width Integer Multiply-Add Instructions
+
+The integer multiply-add instructions are destructive and are provided
+in two forms, one that overwrites the addend or minuend
+(`vmacc`, `vnmsac`) and one that overwrites the first multiplicand
+(`vmadd`, `vnmsub`).
+
+The low half of the product is added or subtracted from the third operand.
+
+NOTE: `sac` is intended to be read as "subtract from accumulator". The
+opcode is `vnmsac` to match the (unfortunately counterintuitive)
+floating-point `fnmsub` instruction definition. Similarly for the
+`vnmsub` opcode.
+
+----
+# Integer multiply-add, overwrite addend
+vmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+vmacc.vx vd, rs1, vs2, vm # vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+
+# Integer multiply-sub, overwrite minuend
+vnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+vnmsac.vx vd, rs1, vs2, vm # vd[i] = -(x[rs1] * vs2[i]) + vd[i]
+
+# Integer multiply-add, overwrite multiplicand
+vmadd.vv vd, vs1, vs2, vm # vd[i] = (vs1[i] * vd[i]) + vs2[i]
+vmadd.vx vd, rs1, vs2, vm # vd[i] = (x[rs1] * vd[i]) + vs2[i]
+
+# Integer multiply-sub, overwrite multiplicand
+vnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i]
+vnmsub.vx vd, rs1, vs2, vm # vd[i] = -(x[rs1] * vd[i]) + vs2[i]
+----
+
+==== Vector Widening Integer Multiply-Add Instructions
+
+The widening integer multiply-add instructions add the full 2*SEW-bit
+product from a SEW-bit*SEW-bit multiply to a 2*SEW-bit value and
+produce a 2*SEW-bit result. All combinations of signed and unsigned
+multiply operands are supported.
+
+----
+# Widening unsigned-integer multiply-add, overwrite addend
+vwmaccu.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+vwmaccu.vx vd, rs1, vs2, vm # vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+
+# Widening signed-integer multiply-add, overwrite addend
+vwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+vwmacc.vx vd, rs1, vs2, vm # vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+
+# Widening signed-unsigned-integer multiply-add, overwrite addend
+vwmaccsu.vv vd, vs1, vs2, vm # vd[i] = +(signed(vs1[i]) * unsigned(vs2[i])) + vd[i]
+vwmaccsu.vx vd, rs1, vs2, vm # vd[i] = +(signed(x[rs1]) * unsigned(vs2[i])) + vd[i]
+
+# Widening unsigned-signed-integer multiply-add, overwrite addend
+vwmaccus.vx vd, rs1, vs2, vm # vd[i] = +(unsigned(x[rs1]) * signed(vs2[i])) + vd[i]
+----
+
+==== Vector Integer Merge Instructions
+
+The vector integer merge instructions combine two source operands
+based on a mask. Unlike regular arithmetic instructions, the
+merge operates on all body elements (i.e., the set of elements from
+`vstart` up to the current vector length in `vl`).
+
+The `vmerge` instructions are encoded as masked instructions (`vm=0`).
+The instructions combine two
+sources as follows. At elements where the mask value is zero, the
+first operand is copied to the destination element, otherwise the
+second operand is copied to the destination element. The first
+operand is always a vector register group specified by `vs2`. The
+second operand is a vector register group specified by `vs1` or a
+scalar `x` register specified by `rs1` or a 5-bit sign-extended
+immediate.
+
+----
+vmerge.vvm vd, vs2, vs1, v0 # vd[i] = v0.mask[i] ? vs1[i] : vs2[i]
+vmerge.vxm vd, vs2, rs1, v0 # vd[i] = v0.mask[i] ? x[rs1] : vs2[i]
+vmerge.vim vd, vs2, imm, v0 # vd[i] = v0.mask[i] ? imm : vs2[i]
+----
+
+==== Vector Integer Move Instructions
+
+The vector integer move instructions copy a source operand to a vector
+register group.
+The `vmv.v.v` variant copies a vector register group, whereas the `vmv.v.x`
+and `vmv.v.i` variants __splat__ a scalar register or immediate to all active
+elements of the destination vector register group.
+These instructions are encoded as unmasked instructions (`vm=1`).
+The first operand specifier (`vs2`) must contain `v0`, and any other vector
+register number in `vs2` is _reserved_.
+
+----
+vmv.v.v vd, vs1 # vd[i] = vs1[i]
+vmv.v.x vd, rs1 # vd[i] = x[rs1]
+vmv.v.i vd, imm # vd[i] = imm
+----
+
+NOTE: Mask values can be widened into SEW-width elements using a
+sequence `vmv.v.i vd, 0; vmerge.vim vd, vd, 1, v0`.
+
+NOTE: The vector integer move instructions share the encoding with the vector
+merge instructions, but with `vm=1` and `vs2=v0`.
+
+The form `vmv.v.v vd, vd`, which leaves body elements unchanged,
+can be used to indicate that the register will next be used
+with an EEW equal to SEW.
+
+NOTE: Implementations that internally reorganize data according to EEW
+can shuffle the internal representation according to SEW.
+Implementations that do not internally reorganize data can dynamically
+elide this instruction, and treat as a NOP.
+
+NOTE: The `vmv.v.v vd. vd` instruction is not a RISC-V HINT as a
+tail-agnostic setting may cause an architectural state change on some
+implementations.
+
+[[sec-vector-fixed-point]]
+=== Vector Fixed-Point Arithmetic Instructions
+
+The preceding set of integer arithmetic instructions is extended to support
+fixed-point arithmetic.
+
+A fixed-point number is a two's-complement signed or unsigned integer
+interpreted as the numerator in a fraction with an implicit denominator.
+The fixed-point instructions are intended to be applied to the numerators;
+it is the responsibility of software to manage the denominators.
+An N-bit element can hold two's-complement signed integers in the
+range -2^N-1^...+2^N-1^-1, and unsigned integers in the range 0
+... +2^N^-1. The fixed-point instructions help preserve precision in
+narrow operands by supporting scaling and rounding, and can handle
+overflow by saturating results into the destination format range.
+
+NOTE: The widening integer operations described above can also be used
+to avoid overflow.
+
+==== Vector Single-Width Saturating Add and Subtract
+
+Saturating forms of integer add and subtract are provided, for both
+signed and unsigned integers. If the result would overflow the
+destination, the result is replaced with the closest representable
+value, and the `vxsat` bit is set.
+
+----
+# Saturating adds of unsigned integers.
+vsaddu.vv vd, vs2, vs1, vm # Vector-vector
+vsaddu.vx vd, vs2, rs1, vm # vector-scalar
+vsaddu.vi vd, vs2, imm, vm # vector-immediate
+
+# Saturating adds of signed integers.
+vsadd.vv vd, vs2, vs1, vm # Vector-vector
+vsadd.vx vd, vs2, rs1, vm # vector-scalar
+vsadd.vi vd, vs2, imm, vm # vector-immediate
+
+# Saturating subtract of unsigned integers.
+vssubu.vv vd, vs2, vs1, vm # Vector-vector
+vssubu.vx vd, vs2, rs1, vm # vector-scalar
+
+# Saturating subtract of signed integers.
+vssub.vv vd, vs2, vs1, vm # Vector-vector
+vssub.vx vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Single-Width Averaging Add and Subtract
+
+The averaging add and subtract instructions right shift the result by
+one bit and round off the result according to the setting in `vxrm`.
+Both unsigned and signed versions are provided.
+For `vaaddu` and `vaadd` there can be no overflow in the result.
+For `vasub` and `vasubu`, overflow is ignored and the result wraps around.
+
+NOTE: For `vasub`, overflow occurs only when subtracting the smallest number
+from the largest number under `rnu` or `rne` rounding.
+
+----
+# Averaging add
+
+# Averaging adds of unsigned integers.
+vaaddu.vv vd, vs2, vs1, vm # roundoff_unsigned(vs2[i] + vs1[i], 1)
+vaaddu.vx vd, vs2, rs1, vm # roundoff_unsigned(vs2[i] + x[rs1], 1)
+
+# Averaging adds of signed integers.
+vaadd.vv vd, vs2, vs1, vm # roundoff_signed(vs2[i] + vs1[i], 1)
+vaadd.vx vd, vs2, rs1, vm # roundoff_signed(vs2[i] + x[rs1], 1)
+
+# Averaging subtract
+
+# Averaging subtract of unsigned integers.
+vasubu.vv vd, vs2, vs1, vm # roundoff_unsigned(vs2[i] - vs1[i], 1)
+vasubu.vx vd, vs2, rs1, vm # roundoff_unsigned(vs2[i] - x[rs1], 1)
+
+# Averaging subtract of signed integers.
+vasub.vv vd, vs2, vs1, vm # roundoff_signed(vs2[i] - vs1[i], 1)
+vasub.vx vd, vs2, rs1, vm # roundoff_signed(vs2[i] - x[rs1], 1)
+----
+
+==== Vector Single-Width Fractional Multiply with Rounding and Saturation
+
+The signed fractional multiply instruction produces a 2*SEW product of
+the two SEW inputs, then shifts the result right by SEW-1 bits,
+rounding these bits according to `vxrm`, then saturates the result to
+fit into SEW bits. If the result causes saturation, the `vxsat` bit
+is set.
+
+----
+# Signed saturating and rounding fractional multiply
+# See vxrm description for rounding calculation
+vsmul.vv vd, vs2, vs1, vm # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1))
+vsmul.vx vd, vs2, rs1, vm # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1))
+----
+
+NOTE: When multiplying two N-bit signed numbers, the largest magnitude
+is obtained for -2^N-1^ * -2^N-1^ producing a result +2^2N-2^, which
+has a single (zero) sign bit when held in 2N bits. All other products
+have two sign bits in 2N bits. To retain greater precision in N
+result bits, the product is shifted right by one bit less than N,
+saturating the largest magnitude result but increasing result
+precision by one bit for all other products.
+
+NOTE: We do not provide an equivalent fractional multiply where one
+input is unsigned, as these would retain all upper SEW bits and would
+not need to saturate. This operation is partly covered by the
+`vmulhu` and `vmulhsu` instructions, for the case where rounding is
+simply truncation (`rdn`).
+
+==== Vector Single-Width Scaling Shift Instructions
+
+These instructions shift the input value right, and round off the
+shifted out bits according to `vxrm`. The scaling right shifts have
+both zero-extending (`vssrl`) and sign-extending (`vssra`) forms. The
+data to be shifted is in the vector register group specified by `vs2`
+and the shift amount value can come from a vector register group
+`vs1`, a scalar integer register `rs1`, or a zero-extended 5-bit
+immediate. Only the low lg2(SEW) bits of the shift-amount value are
+used to control the shift amount.
+
+----
+ # Scaling shift right logical
+ vssrl.vv vd, vs2, vs1, vm # vd[i] = roundoff_unsigned(vs2[i], vs1[i])
+ vssrl.vx vd, vs2, rs1, vm # vd[i] = roundoff_unsigned(vs2[i], x[rs1])
+ vssrl.vi vd, vs2, uimm, vm # vd[i] = roundoff_unsigned(vs2[i], uimm)
+
+ # Scaling shift right arithmetic
+ vssra.vv vd, vs2, vs1, vm # vd[i] = roundoff_signed(vs2[i],vs1[i])
+ vssra.vx vd, vs2, rs1, vm # vd[i] = roundoff_signed(vs2[i], x[rs1])
+ vssra.vi vd, vs2, uimm, vm # vd[i] = roundoff_signed(vs2[i], uimm)
+----
+
+==== Vector Narrowing Fixed-Point Clip Instructions
+
+The `vnclip` instructions are used to pack a fixed-point value into a
+narrower destination. The instructions support rounding, scaling, and
+saturation into the final destination format. The source data is in
+the vector register group specified by `vs2`. The scaling shift amount
+value can come from a vector register group `vs1`, a scalar integer
+register `rs1`, or a zero-extended 5-bit immediate. The low
+lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the
+low 6 bits for a SEW=64-bit to SEW=32-bit narrowing operation) are
+used to control the right shift amount, which provides the scaling.
+----
+# Narrowing unsigned clip
+# SEW 2*SEW SEW
+ vnclipu.wv vd, vs2, vs1, vm # vd[i] = clip(roundoff_unsigned(vs2[i], vs1[i]))
+ vnclipu.wx vd, vs2, rs1, vm # vd[i] = clip(roundoff_unsigned(vs2[i], x[rs1]))
+ vnclipu.wi vd, vs2, uimm, vm # vd[i] = clip(roundoff_unsigned(vs2[i], uimm))
+
+# Narrowing signed clip
+ vnclip.wv vd, vs2, vs1, vm # vd[i] = clip(roundoff_signed(vs2[i], vs1[i]))
+ vnclip.wx vd, vs2, rs1, vm # vd[i] = clip(roundoff_signed(vs2[i], x[rs1]))
+ vnclip.wi vd, vs2, uimm, vm # vd[i] = clip(roundoff_signed(vs2[i], uimm))
+----
+
+For `vnclipu`/`vnclip`, the rounding mode is specified in the `vxrm`
+CSR. Rounding occurs around the least-significant bit of the
+destination and before saturation.
+
+For `vnclipu`, the shifted rounded source value is treated as an
+unsigned integer and saturates if the result would overflow the
+destination viewed as an unsigned integer.
+
+NOTE: There is no single instruction that can saturate a signed value
+into an unsigned destination. A sequence of two vector instructions
+that first removes negative numbers by performing a max against 0
+using `vmax` then clips the resulting unsigned value into the
+destination using `vnclipu` can be used if setting `vxsat` value for
+negative numbers is not required. A `vsetvli` is required inbetween
+these two instructions to change SEW.
+
+For `vnclip`, the shifted rounded source value is treated as a signed
+integer and saturates if the result would overflow the destination viewed
+as a signed integer.
+
+If any destination element is saturated, the `vxsat` bit is set in the
+`vxsat` register.
+
+[[sec-vector-float]]
+=== Vector Floating-Point Instructions
+
+The standard vector floating-point instructions treat elements as
+IEEE-754/2008-compatible values. If the EEW of a vector
+floating-point operand does not correspond to a supported IEEE
+floating-point type, the instruction encoding is reserved.
+
+NOTE: Whether floating-point is supported, and for which element
+widths, is determined by the specific vector extension. The current
+set of extensions include support for 32-bit and 64-bit floating-point
+values. When 16-bit and 128-bit element widths are added, they will be
+also be treated as IEEE-754/2008-compatible values. Other
+floating-point formats may be supported in future extensions.
+
+Vector floating-point instructions require the presence of base scalar
+floating-point extensions corresponding to the supported vector
+floating-point element widths.
+
+NOTE: In particular, future vector extensions supporting 16-bit
+half-precision floating-point values will also require some scalar
+half-precision floating-point support.
+
+If the floating-point unit status field `mstatus.FS` is `Off` then any
+attempt to execute a vector floating-point instruction will raise an
+illegal instruction exception. Any vector floating-point instruction
+that modifies any floating-point extension state (i.e., floating-point
+CSRs or `f` registers) must set `mstatus.FS` to `Dirty`.
+
+If the hypervisor extension is implemented and V=1, the `vsstatus.FS` field is
+additionally in effect for vector floating-point instructions. If
+`vsstatus.FS` or `mstatus.FS` is `Off` then any
+attempt to execute a vector floating-point instruction will raise an
+illegal instruction exception. Any vector floating-point instruction
+that modifies any floating-point extension state (i.e., floating-point
+CSRs or `f` registers) must set both `mstatus.FS` and `vsstatus.FS` to `Dirty`.
+
+The vector floating-point instructions have the same behavior as the
+scalar floating-point instructions with regard to NaNs.
+
+Scalar values for floating-point vector-scalar operations are sourced
+as described in Section <<sec-arithmetic-encoding>>.
+
+==== Vector Floating-Point Exception Flags
+
+A vector floating-point exception at any active floating-point element
+sets the standard FP exception flags in the `fflags` register. Inactive
+elements do not set FP exception flags.
+
+==== Vector Single-Width Floating-Point Add/Subtract Instructions
+
+----
+ # Floating-point add
+ vfadd.vv vd, vs2, vs1, vm # Vector-vector
+ vfadd.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Floating-point subtract
+ vfsub.vv vd, vs2, vs1, vm # Vector-vector
+ vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1]
+ vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i]
+----
+
+==== Vector Widening Floating-Point Add/Subtract Instructions
+
+----
+# Widening FP add/subtract, 2*SEW = SEW +/- SEW
+vfwadd.vv vd, vs2, vs1, vm # vector-vector
+vfwadd.vf vd, vs2, rs1, vm # vector-scalar
+vfwsub.vv vd, vs2, vs1, vm # vector-vector
+vfwsub.vf vd, vs2, rs1, vm # vector-scalar
+
+# Widening FP add/subtract, 2*SEW = 2*SEW +/- SEW
+vfwadd.wv vd, vs2, vs1, vm # vector-vector
+vfwadd.wf vd, vs2, rs1, vm # vector-scalar
+vfwsub.wv vd, vs2, vs1, vm # vector-vector
+vfwsub.wf vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Single-Width Floating-Point Multiply/Divide Instructions
+
+----
+ # Floating-point multiply
+ vfmul.vv vd, vs2, vs1, vm # Vector-vector
+ vfmul.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Floating-point divide
+ vfdiv.vv vd, vs2, vs1, vm # Vector-vector
+ vfdiv.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Reverse floating-point divide vector = scalar / vector
+ vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
+----
+
+==== Vector Widening Floating-Point Multiply
+
+----
+# Widening floating-point multiply
+vfwmul.vv vd, vs2, vs1, vm # vector-vector
+vfwmul.vf vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+
+All four varieties of fused multiply-add are provided, and in two
+destructive forms that overwrite one of the operands, either the
+addend or the first multiplicand.
+
+----
+# FP multiply-accumulate, overwrites addend
+vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
+
+# FP negate-(multiply-accumulate), overwrites subtrahend
+vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+
+# FP multiply-subtract-accumulator, overwrites subtrahend
+vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+
+# FP negate-(multiply-subtract-accumulator), overwrites minuend
+vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+
+# FP multiply-add, overwrites multiplicand
+vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i]
+vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i]
+
+# FP negate-(multiply-add), overwrites multiplicand
+vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i]
+vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i]
+
+# FP multiply-sub, overwrites multiplicand
+vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i]
+vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i]
+
+# FP negate-(multiply-sub), overwrites multiplicand
+vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i]
+vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i]
+----
+
+NOTE: While we considered using the two unused rounding modes
+in the scalar FP FMA encoding to provide a few non-destructive FMAs,
+these would complicate microarchitectures by being the only maskable
+operation with three inputs and separate output.
+
+==== Vector Widening Floating-Point Fused Multiply-Add Instructions
+
+The widening floating-point fused multiply-add instructions all
+overwrite the wide addend with the result. The multiplier inputs are
+all SEW wide, while the addend and destination is 2*SEW bits wide.
+
+----
+# FP widening multiply-accumulate, overwrites addend
+vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
+
+# FP widening negate-(multiply-accumulate), overwrites addend
+vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+
+# FP widening multiply-subtract-accumulator, overwrites addend
+vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+
+# FP widening negate-(multiply-subtract-accumulator), overwrites addend
+vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+----
+
+==== Vector Floating-Point Square-Root Instruction
+
+This is a unary vector-vector instruction.
+
+----
+ # Floating-point square root
+ vfsqrt.v vd, vs2, vm # Vector-vector square root
+----
+
+==== Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+
+----
+ # Floating-point reciprocal square-root estimate to 7 bits.
+ vfrsqrt7.v vd, vs2, vm
+----
+
+This is a unary vector-vector instruction that returns an estimate of
+1/sqrt(x) accurate to 7 bits.
+
+NOTE: An earlier draft version had used the assembler name `vfrsqrte7`
+but this was deemed to cause confusion with the ``e``__x__ notation for element
+width. The earlier name can be retained as alias in tool chains for
+backward compatibility.
+
+The following table describes the instruction's behavior for all
+classes of floating-point inputs:
+
+[cols="1,1,1"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Input | Output | Exceptions raised
+
+| -{inf} {le} _x_ < -0.0 | canonical NaN | NV
+| -0.0 | -{inf} | DZ
+| +0.0 | +{inf} | DZ
+| +0.0 < _x_ < +{inf} | _estimate of 1/sqrt(x)_ |
+| +{inf} | +0.0 |
+| qNaN | canonical NaN |
+| sNaN | canonical NaN | NV
+|===
+
+NOTE: All positive normal and subnormal inputs produce normal outputs.
+
+NOTE: The output value is independent of the dynamic rounding mode.
+
+For the non-exceptional cases, the low bit of the exponent and the six high
+bits of significand (after the leading one) are concatenated and used to
+address the following table.
+The output of the table becomes the seven high bits of the result significand
+(after the leading one); the remainder of the result significand is zero.
+Subnormal inputs are normalized and the exponent adjusted appropriately before
+the lookup.
+The output exponent is chosen to make the result approximate the reciprocal of
+the square root of the argument.
+
+More precisely, the result is computed as follows.
+Let the normalized input exponent be equal to the input exponent if the input
+is normal, or 0 minus the number of leading zeros in the significand
+otherwise.
+If the input is subnormal, the normalized input significand is given by
+shifting the input significand left by 1 minus the normalized input exponent,
+discarding the leading 1 bit.
+The output exponent equals floor((3*B - 1 - the normalized input exponent) / 2),
+where B is the exponent bias. The output sign equals the input sign.
+
+The following table gives the seven MSBs of the output significand as a
+function of the LSB of the normalized input exponent and the six MSBs of the
+normalized input significand; the other bits of the output significand are zero.
+
+include::images/wavedrom/vfrsqrt7.adoc[]
+
+NOTE: For example, when SEW=32, vfrsqrt7(0x00718abc ({approx} 1.043e-38)) = 0x5f080000 ({approx} 9.800e18), and vfrsqrt7(0x7f765432 ({approx} 3.274e38)) = 0x1f820000 ({approx} 5.506e-20).
+
+NOTE: The 7 bit accuracy was chosen as it requires 0,1,2,3
+Newton-Raphson iterations to converge to close to bfloat16, FP16,
+FP32, FP64 accuracy respectively. Future instructions can be defined
+with greater estimate accuracy.
+
+==== Vector Floating-Point Reciprocal Estimate Instruction
+
+----
+ # Floating-point reciprocal estimate to 7 bits.
+ vfrec7.v vd, vs2, vm
+----
+
+NOTE: An earlier draft version had used the assembler name `vfrece7`
+but this was deemed to cause confusion with ``e``__x__ notation for element
+width. The earlier name can be retained as alias in tool chains for
+backward compatibility.
+
+This is a unary vector-vector instruction that returns an estimate of
+1/x accurate to 7 bits.
+
+The following table describes the instruction's behavior for all
+classes of floating-point inputs, where _B_ is the exponent bias:
+
+[cols="1,1,1,1"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Input (_x_) | Rounding Mode | Output (_y_ {approx} _1/x_) | Exceptions raised
+
+| -{inf} | _any_ | -0.0 |
+| -2^B+1^ < _x_ {le} -2^B^ (normal) | _any_ | -2^-(B+1)^ {ge} _y_ > -2^-B^ (subnormal, sig=01...) |
+| -2^B^ < _x_ {le} -2^B-1^ (normal) | _any_ | -2^-B^ {ge} _y_ > -2^-B+1^ (subnormal, sig=1...) |
+| -2^B-1^ < _x_ {le} -2^-B+1^ (normal) | _any_ | -2^-B+1^ {ge} _y_ > -2^B-1^ (normal) |
+| -2^-B+1^ < _x_ {le} -2^-B^ (subnormal, sig=1...) | _any_ | -2^B-1^ {ge} _y_ > -2^B^ (normal) |
+| -2^-B^ < _x_ {le} -2^-(B+1)^ (subnormal, sig=01...) | _any_ | -2^B^ {ge} _y_ > -2^B+1^ (normal) |
+| -2^-(B+1)^ < _x_ < -0.0 (subnormal, sig=00...) | RUP, RTZ | greatest-mag. negative finite value | NX, OF
+| -2^-(B+1)^ < _x_ < -0.0 (subnormal, sig=00...) | RDN, RNE, RMM | -{inf} | NX, OF
+| -0.0 | _any_ | -{inf} | DZ
+| +0.0 | _any_ | +{inf} | DZ
+| +0.0 < _x_ < 2^-(B+1)^ (subnormal, sig=00...) | RUP, RNE, RMM | +{inf} | NX, OF
+| +0.0 < _x_ < 2^-(B+1)^ (subnormal, sig=00...) | RDN, RTZ | greatest finite value | NX, OF
+| 2^-(B+1)^ {le} _x_ < 2^-B^ (subnormal, sig=01...) | _any_ | 2^B+1^ > _y_ {ge} 2^B^ (normal) |
+| 2^-B^ {le} _x_ < 2^-B+1^ (subnormal, sig=1...) | _any_ | 2^B^ > _y_ {ge} 2^B-1^ (normal) |
+| 2^-B+1^ {le} _x_ < 2^B-1^ (normal) | _any_ | 2^B-1^ > _y_ {ge} 2^-B+1^ (normal) |
+| 2^B-1^ {le} _x_ < 2^B^ (normal) | _any_ | 2^-B+1^ > _y_ {ge} 2^-B^ (subnormal, sig=1...) |
+| 2^B^ {le} _x_ < 2^B+1^ (normal) | _any_ | 2^-B^ > _y_ {ge} 2^-(B+1)^ (subnormal, sig=01...) |
+| +{inf} | _any_ | +0.0 |
+| qNaN | _any_ | canonical NaN |
+| sNaN | _any_ | canonical NaN | NV
+|===
+
+NOTE: Subnormal inputs with magnitude at least 2^-(B+1)^ produce normal outputs;
+other subnormal inputs produce infinite outputs.
+Normal inputs with magnitude at least 2^B-1^ produce subnormal outputs;
+other normal inputs produce normal outputs.
+
+NOTE: The output value depends on the dynamic rounding mode when
+the overflow exception is raised.
+
+For the non-exceptional cases, the seven high bits of significand (after the
+leading one) are used to address the following table.
+The output of the table becomes the seven high bits of the result significand
+(after the leading one); the remainder of the result significand is zero.
+Subnormal inputs are normalized and the exponent adjusted appropriately before
+the lookup.
+The output exponent is chosen to make the result approximate the reciprocal of
+the argument, and subnormal outputs are denormalized accordingly.
+
+More precisely, the result is computed as follows.
+Let the normalized input exponent be equal to the input exponent if the input
+is normal, or 0 minus the number of leading zeros in the significand
+otherwise.
+The normalized output exponent equals (2*B - 1 - the normalized input exponent).
+If the normalized output exponent is outside the range [-1, 2*B], the result
+corresponds to one of the exceptional cases in the table above.
+
+If the input is subnormal, the normalized input significand is given by
+shifting the input significand left by 1 minus the normalized input exponent,
+discarding the leading 1 bit.
+Otherwise, the normalized input significand equals the input significand.
+The following table gives the seven MSBs of the normalized output significand
+as a function of the seven MSBs of the normalized input significand; the other
+bits of the normalized output significand are zero.
+
+include::images/wavedrom/vfrec7.adoc[]
+
+If the normalized output exponent is 0 or -1, the result is subnormal: the
+output exponent is 0, and the output significand is given by concatenating
+a 1 bit to the left of the normalized output significand, then shifting that
+quantity right by 1 minus the normalized output exponent.
+Otherwise, the output exponent equals the normalized output exponent, and the
+output significand equals the normalized output significand.
+The output sign equals the input sign.
+
+NOTE: For example, when SEW=32, vfrec7(0x00718abc ({approx} 1.043e-38)) = 0x7e900000 ({approx} 9.570e37), and vfrec7(0x7f765432 ({approx} 3.274e38)) = 0x00214000 ({approx} 3.053e-39).
+
+NOTE: The 7 bit accuracy was chosen as it requires 0,1,2,3
+Newton-Raphson iterations to converge to close to bfloat16, FP16,
+FP32, FP64 accuracy respectively. Future instructions can be defined
+with greater estimate accuracy.
+
+==== Vector Floating-Point MIN/MAX Instructions
+
+The vector floating-point `vfmin` and `vfmax` instructions have the
+same behavior as the corresponding scalar floating-point instructions
+in version 2.2 of the RISC-V F/D/Q extension: they perform the `minimumNumber`
+or `maximumNumber` operation on active elements.
+
+----
+ # Floating-point minimum
+ vfmin.vv vd, vs2, vs1, vm # Vector-vector
+ vfmin.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Floating-point maximum
+ vfmax.vv vd, vs2, vs1, vm # Vector-vector
+ vfmax.vf vd, vs2, rs1, vm # vector-scalar
+----
+
+==== Vector Floating-Point Sign-Injection Instructions
+
+Vector versions of the scalar sign-injection instructions. The result
+takes all bits except the sign bit from the vector `vs2` operands.
+
+----
+ vfsgnj.vv vd, vs2, vs1, vm # Vector-vector
+ vfsgnj.vf vd, vs2, rs1, vm # vector-scalar
+
+ vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector
+ vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar
+
+ vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector
+ vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar
+----
+
+NOTE: A vector of floating-point values can be negated using a
+sign-injection instruction with both source operands set to the same
+vector operand. An assembly pseudoinstruction is provided: `vfneg.v vd,vs` = `vfsgnjn.vv vd,vs,vs`.
+
+NOTE: The absolute value of a vector of floating-point elements can be
+calculated using a sign-injection instruction with both source
+operands set to the same vector operand. An assembly
+pseudoinstruction is provided: `vfabs.v vd,vs` = `vfsgnjx.vv vd,vs,vs`.
+
+==== Vector Floating-Point Compare Instructions
+
+These vector FP compare instructions compare two source operands and
+write the comparison result to a mask register. The destination mask
+vector is always held in a single vector register, with a layout of
+elements as described in Section <<sec-mask-register-layout>>. The
+destination mask vector register may be the same as the source vector
+mask register (`v0`). Compares write mask registers, and so always
+operate under a tail-agnostic policy.
+
+The compare instructions follow the semantics of the scalar
+floating-point compare instructions. `vmfeq` and `vmfne` raise the invalid
+operation exception only on signaling NaN inputs. `vmflt`, `vmfle`, `vmfgt`,
+and `vmfge` raise the invalid operation exception on both signaling and
+quiet NaN inputs.
+`vmfne` writes 1 to the destination element when either
+operand is NaN, whereas the other compares write 0 when either operand
+is NaN.
+
+----
+ # Compare equal
+ vmfeq.vv vd, vs2, vs1, vm # Vector-vector
+ vmfeq.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Compare not equal
+ vmfne.vv vd, vs2, vs1, vm # Vector-vector
+ vmfne.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Compare less than
+ vmflt.vv vd, vs2, vs1, vm # Vector-vector
+ vmflt.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Compare less than or equal
+ vmfle.vv vd, vs2, vs1, vm # Vector-vector
+ vmfle.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Compare greater than
+ vmfgt.vf vd, vs2, rs1, vm # vector-scalar
+
+ # Compare greater than or equal
+ vmfge.vf vd, vs2, rs1, vm # vector-scalar
+----
+
+----
+Comparison Assembler Mapping Assembler pseudoinstruction
+
+va < vb vmflt.vv vd, va, vb, vm
+va <= vb vmfle.vv vd, va, vb, vm
+va > vb vmflt.vv vd, vb, va, vm vmfgt.vv vd, va, vb, vm
+va >= vb vmfle.vv vd, vb, va, vm vmfge.vv vd, va, vb, vm
+
+va < f vmflt.vf vd, va, f, vm
+va <= f vmfle.vf vd, va, f, vm
+va > f vmfgt.vf vd, va, f, vm
+va >= f vmfge.vf vd, va, f, vm
+
+va, vb vector register groups
+f scalar floating-point register
+----
+
+NOTE: Providing all forms is necessary to correctly handle unordered
+compares for NaNs.
+
+NOTE: C99 floating-point quiet compares can be implemented by masking
+the signaling compares when either input is NaN, as follows. When
+the comparand is a non-NaN constant, the middle two instructions can be
+omitted.
+
+----
+ # Example of implementing isgreater()
+ vmfeq.vv v0, va, va # Only set where A is not NaN.
+ vmfeq.vv v1, vb, vb # Only set where B is not NaN.
+ vmand.mm v0, v0, v1 # Only set where A and B are ordered,
+ vmfgt.vv v0, va, vb, v0.t # so only set flags on ordered values.
+----
+
+NOTE: In the above sequence, it is tempting to mask the second `vmfeq`
+instruction and remove the `vmand` instruction, but this more efficient
+sequence incorrectly fails to raise the invalid exception when an
+element of `va` contains a quiet NaN and the corresponding element in
+`vb` contains a signaling NaN.
+
+==== Vector Floating-Point Classify Instruction
+
+This is a unary vector-vector instruction that operates in the same
+way as the scalar classify instruction.
+
+----
+ vfclass.v vd, vs2, vm # Vector-vector
+----
+
+The 10-bit mask produced by this instruction is placed in the
+least-significant bits of the result elements. The upper (SEW-10)
+bits of the result are filled with zeros. The instruction is only
+defined for SEW=16b and above, so the result will always fit in the
+destination elements.
+
+==== Vector Floating-Point Merge Instruction
+
+A vector-scalar floating-point merge instruction is provided, which
+operates on all body elements from `vstart` up to the current vector
+length in `vl` regardless of mask value.
+
+The `vfmerge.vfm` instruction is encoded as a masked instruction (`vm=0`).
+At elements where the mask value is zero, the first vector operand is
+copied to the destination element, otherwise a scalar floating-point
+register value is copied to the destination element.
+
+----
+vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0.mask[i] ? f[rs1] : vs2[i]
+----
+
+[[sec-vector-float-move]]
+==== Vector Floating-Point Move Instruction
+
+The vector floating-point move instruction __splats__ a floating-point
+scalar operand to a vector register group. The instruction copies a
+scalar `f` register value to all active elements of a vector register
+group. This instruction is encoded as an unmasked instruction (`vm=1`).
+The instruction must have the `vs2` field set to `v0`, with all other
+values for `vs2` reserved.
+
+----
+vfmv.v.f vd, rs1 # vd[i] = f[rs1]
+----
+
+NOTE: The `vfmv.v.f` instruction shares the encoding with the `vfmerge.vfm`
+instruction, but with `vm=1` and `vs2=v0`.
+
+==== Single-Width Floating-Point/Integer Type-Convert Instructions
+
+Conversion operations are provided to convert to and from
+floating-point values and unsigned and signed integers, where both
+source and destination are SEW wide.
+
+----
+vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer.
+vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer.
+
+vfcvt.rtz.xu.f.v vd, vs2, vm # Convert float to unsigned integer, truncating.
+vfcvt.rtz.x.f.v vd, vs2, vm # Convert float to signed integer, truncating.
+
+vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float.
+vfcvt.f.x.v vd, vs2, vm # Convert signed integer to float.
+----
+
+The conversions follow the same rules on exceptional conditions as the
+scalar conversion instructions.
+The conversions use the dynamic rounding mode in `frm`, except for the `rtz`
+variants, which round towards zero.
+
+NOTE: The `rtz` variants are provided to accelerate truncating conversions
+from floating-point to integer, as is common in languages like C and Java.
+
+==== Widening Floating-Point/Integer Type-Convert Instructions
+
+A set of conversion instructions is provided to convert between
+narrower integer and floating-point datatypes to a type of twice the
+width.
+
+----
+vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
+vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer.
+
+vfwcvt.rtz.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer, truncating.
+vfwcvt.rtz.x.f.v vd, vs2, vm # Convert float to double-width signed integer, truncating.
+
+vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
+vfwcvt.f.x.v vd, vs2, vm # Convert signed integer to double-width float.
+
+vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
+----
+
+These instructions have the same constraints on vector register overlap
+as other widening instructions (see <<sec-widening>>).
+
+NOTE: A double-width IEEE floating-point value can always represent a
+single-width integer exactly.
+
+NOTE: A double-width IEEE floating-point value can always represent a
+single-width IEEE floating-point value exactly.
+
+NOTE: A full set of floating-point widening conversions is not
+supported as single instructions, but any widening conversion can be
+implemented as several doubling steps with equivalent results and no
+additional exception flags raised.
+
+==== Narrowing Floating-Point/Integer Type-Convert Instructions
+
+A set of conversion instructions is provided to convert wider integer
+and floating-point datatypes to a type of half the width.
+
+----
+vfncvt.xu.f.w vd, vs2, vm # Convert double-width float to unsigned integer.
+vfncvt.x.f.w vd, vs2, vm # Convert double-width float to signed integer.
+
+vfncvt.rtz.xu.f.w vd, vs2, vm # Convert double-width float to unsigned integer, truncating.
+vfncvt.rtz.x.f.w vd, vs2, vm # Convert double-width float to signed integer, truncating.
+
+vfncvt.f.xu.w vd, vs2, vm # Convert double-width unsigned integer to float.
+vfncvt.f.x.w vd, vs2, vm # Convert double-width signed integer to float.
+
+vfncvt.f.f.w vd, vs2, vm # Convert double-width float to single-width float.
+vfncvt.rod.f.f.w vd, vs2, vm # Convert double-width float to single-width float,
+ # rounding towards odd.
+----
+
+These instructions have the same constraints on vector register overlap
+as other narrowing instructions (see <<sec-narrowing>>).
+
+NOTE: A full set of floating-point narrowing conversions is not
+supported as single instructions. Conversions can be implemented in
+a sequence of halving steps. Results are equivalently rounded and
+the same exception flags are raised if all but the last halving step
+use round-towards-odd (`vfncvt.rod.f.f.w`). Only the final step
+should use the desired rounding mode.
+
+NOTE: For `vfncvt.rod.f.f.w`, a finite value that exceeds the range of the
+destination format is converted to the destination format's largest finite value with the same sign.
+
+=== Vector Reduction Operations
+
+Vector reduction operations take a vector register group of elements
+and a scalar held in element 0 of a vector register, and perform a
+reduction using some binary operator, to produce a scalar result in
+element 0 of a vector register. The scalar input and output operands
+are held in element 0 of a single vector register, not a vector
+register group, so any vector register can be the scalar source or
+destination of a vector reduction regardless of LMUL setting.
+
+The destination vector register can overlap the source operands,
+including the mask register.
+
+NOTE: Vector reductions read and write the scalar operand and result
+into element 0 of a vector register instead of a scalar register to
+avoid a loss of decoupling with the scalar processor, and to support
+future polymorphic use with future types not supported in the scalar
+unit.
+
+Inactive elements from the source vector register group are excluded
+from the reduction, but the scalar operand is always included
+regardless of the mask values.
+
+The other elements in the destination vector register ( 0 < index <
+VLEN/SEW) are considered the tail and are managed with the current
+tail agnostic/undisturbed policy.
+
+If `vl`=0, no operation is performed and the destination register is
+not updated.
+
+NOTE: This choice of behavior for `vl`=0 reduces implementation
+complexity as it is consistent with other operations on vector
+register state. For the common case that the source and destination
+scalar operand are the same vector register, this behavior also
+produces the expected result. For the uncommon case that the source
+and destination scalar operand are in different vector registers, this
+instruction will not copy the source into the destination when `vl`=0.
+However, it is expected that in most of these cases it will be
+statically known that `vl` is not zero. In other cases, a check for
+`vl`=0 will have to be added to ensure that the source scalar is
+copied to the destination (e.g., by explicitly setting `vl`=1 and
+performing a register-register copy).
+
+Traps on vector reduction instructions are always reported with a
+`vstart` of 0. Vector reduction operations raise an illegal
+instruction exception if `vstart` is non-zero.
+
+The assembler syntax for a reduction operation is `vredop.vs`, where
+the `.vs` suffix denotes the first operand is a vector register group
+and the second operand is a scalar stored in element 0 of a vector
+register.
+
+[[sec-vector-integer-reduce]]
+==== Vector Single-Width Integer Reduction Instructions
+
+All operands and results of single-width reduction instructions have
+the same SEW width. Overflows wrap around on arithmetic sums.
+
+----
+ # Simple reductions, where [*] denotes all active elements:
+ vredsum.vs vd, vs2, vs1, vm # vd[0] = sum( vs1[0] , vs2[*] )
+ vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] )
+ vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] )
+ vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] )
+ vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] )
+ vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] )
+ vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] )
+ vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] )
+----
+
+[[sec-vector-integer-reduce-widen]]
+==== Vector Widening Integer Reduction Instructions
+
+The unsigned `vwredsumu.vs` instruction zero-extends the SEW-wide
+vector elements before summing them, then adds the 2*SEW-width scalar
+element, and stores the result in a 2*SEW-width scalar element.
+
+The `vwredsum.vs` instruction sign-extends the SEW-wide vector
+elements before summing them.
+
+For both `vwredsumu.vs` and `vwredsum.vs`, overflows wrap around.
+
+----
+ # Unsigned sum reduction into double-width accumulator
+ vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW))
+
+ # Signed sum reduction into double-width accumulator
+ vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW))
+----
+
+[[sec-vector-float-reduce]]
+==== Vector Single-Width Floating-Point Reduction Instructions
+
+----
+ # Simple reductions.
+ vfredosum.vs vd, vs2, vs1, vm # Ordered sum
+ vfredusum.vs vd, vs2, vs1, vm # Unordered sum
+ vfredmax.vs vd, vs2, vs1, vm # Maximum value
+ vfredmin.vs vd, vs2, vs1, vm # Minimum value
+
+----
+
+NOTE: Older assembler mnemonic `vfredsum` is retained as alias for `vfredusum`.
+
+===== Vector Ordered Single-Width Floating-Point Sum Reduction
+
+The `vfredosum` instruction must sum the floating-point values in
+element order, starting with the scalar in `vs1[0]`--that is, it
+performs the computation:
+
+----
+ vd[0] = `(((vs1[0] + vs2[0]) + vs2[1]) + ...) + vs2[vl-1]`
+----
+where each addition operates identically to the scalar floating-point
+instructions in terms of raising exception flags and generating or
+propagating special values.
+
+NOTE: The ordered reduction supports compiler autovectorization, while
+the unordered FP sum allows for faster implementations.
+
+When the operation is masked (`vm=0`), the masked-off elements do not
+affect the result or the exception flags.
+
+NOTE: If no elements are active, no additions are performed, so the scalar in
+`vs1[0]` is simply copied to the destination register, without canonicalizing
+NaN values and without setting any exception flags. This behavior preserves
+the handling of NaNs, exceptions, and rounding when autovectorizing a scalar
+summation loop.
+
+===== Vector Unordered Single-Width Floating-Point Sum Reduction
+
+The unordered sum reduction instruction, `vfredusum`, provides an
+implementation more freedom in performing the reduction.
+
+The implementation must produce a result equivalent to a reduction tree
+composed of binary operator nodes, with the inputs being elements from
+the source vector register group (`vs2`) and the source scalar value
+(`vs1[0]`). Each operator in the tree accepts two inputs and produces
+one result.
+Each operator first computes an exact sum as a RISC-V scalar floating-point
+addition with infinite exponent range and precision, then converts this exact
+sum to a floating-point format with range and precision each at least as great
+as the element floating-point format indicated by SEW, rounding using the
+currently active floating-point dynamic rounding mode and raising exception
+flags as necessary.
+A different floating-point range and precision may be chosen for the result of
+each operator.
+A node where one input is derived only from elements masked-off or beyond the
+active vector length may either treat that input as the additive identity of the
+appropriate EEW or simply copy the other input to its output.
+The rounded result from the root node in the tree is converted (rounded again,
+using the dynamic rounding mode) to the standard floating-point format
+indicated by SEW.
+An implementation
+is allowed to add an additional additive identity to the final result.
+
+The additive identity is +0.0 when rounding down (towards -{inf}) or
+-0.0 for all other rounding modes.
+
+The reduction tree structure must be deterministic for a given value
+in `vtype` and `vl`.
+
+NOTE: As a consequence of this definition, implementations need not propagate
+NaN payloads through the reduction tree when no elements are active. In
+particular, if no elements are active and the scalar input is NaN,
+implementations are permitted to canonicalize the NaN and, if the NaN is
+signaling, set the invalid exception flag. Implementations are alternatively
+permitted to pass through the original NaN and set no exception flags, as with
+`vfredosum`.
+
+NOTE: The `vfredosum` instruction is a valid implementation of the
+`vfredusum` instruction.
+
+===== Vector Single-Width Floating-Point Max and Min Reductions
+
+The `vfredmin` and `vfredmax` instructions reduce the scalar argument in
+`vs1[0]` and active elements in `vs2` using the `minimumNumber` and
+`maximumNumber` operations, respectively.
+
+NOTE: Floating-point max and min reductions should return the same
+final value and raise the same exception flags regardless of operation
+order.
+
+NOTE: If no elements are active, the scalar in `vs1[0]` is simply copied to
+the destination register, without canonicalizing NaN values and without
+setting any exception flags.
+
+[[sec-vector-float-reduce-widen]]
+==== Vector Widening Floating-Point Reduction Instructions
+
+Widening forms of the sum reductions are provided that
+read and write a double-width reduction result.
+
+----
+ # Simple reductions.
+ vfwredosum.vs vd, vs2, vs1, vm # Ordered sum
+ vfwredusum.vs vd, vs2, vs1, vm # Unordered sum
+----
+
+NOTE: Older assembler mnemonic `vfwredsum` is retained as alias for `vfwredusum`.
+
+The reduction of the SEW-width elements is performed as in the
+single-width reduction case, with the elements in `vs2` promoted
+to 2*SEW bits before adding to the 2*SEW-bit accumulator.
+
+NOTE: `vfwredosum.vs` handles inactive elements and NaN payloads analogously
+to `vfredosum.vs`; `vfwredusum.vs` does so analogously to `vfredusum.vs`.
+
+[[sec-vector-mask]]
+=== Vector Mask Instructions
+
+Several instructions are provided to help operate on mask values held in
+a vector register.
+
+[[sec-mask-register-logical]]
+==== Vector Mask-Register Logical Instructions
+
+Vector mask-register logical operations operate on mask registers.
+Each element in a mask register is a single bit, so these instructions
+all operate on single vector registers regardless of the setting of
+the `vlmul` field in `vtype`. They do not change the value of
+`vlmul`. The destination vector register may be the same as either
+source vector register.
+
+As with other vector instructions, the elements with indices less than
+`vstart` are unchanged, and `vstart` is reset to zero after execution.
+Vector mask logical instructions are always unmasked, so there are no
+inactive elements, and the encodings with `vm=0` are reserved.
+Mask elements past `vl`, the tail elements, are
+always updated with a tail-agnostic policy.
+
+----
+ vmand.mm vd, vs2, vs1 # vd.mask[i] = vs2.mask[i] && vs1.mask[i]
+ vmnand.mm vd, vs2, vs1 # vd.mask[i] = !(vs2.mask[i] && vs1.mask[i])
+ vmandn.mm vd, vs2, vs1 # vd.mask[i] = vs2.mask[i] && !vs1.mask[i]
+ vmxor.mm vd, vs2, vs1 # vd.mask[i] = vs2.mask[i] ^^ vs1.mask[i]
+ vmor.mm vd, vs2, vs1 # vd.mask[i] = vs2.mask[i] || vs1.mask[i]
+ vmnor.mm vd, vs2, vs1 # vd.mask[i] = !(vs2.mask[i] || vs1.mask[i])
+ vmorn.mm vd, vs2, vs1 # vd.mask[i] = vs2.mask[i] || !vs1.mask[i]
+ vmxnor.mm vd, vs2, vs1 # vd.mask[i] = !(vs2.mask[i] ^^ vs1.mask[i])
+----
+
+NOTE: The previous assembler mnemonics `vmandnot` and `vmornot` have
+been changed to `vmandn` and `vmorn` to be consistent with the
+equivalent scalar instructions. The old `vmandnot` and `vmornot`
+mnemonics can be retained as assembler aliases for compatibility.
+
+Several assembler pseudoinstructions are defined as shorthand for
+common uses of mask logical operations:
+----
+ vmmv.m vd, vs => vmand.mm vd, vs, vs # Copy mask register
+ vmclr.m vd => vmxor.mm vd, vd, vd # Clear mask register
+ vmset.m vd => vmxnor.mm vd, vd, vd # Set mask register
+ vmnot.m vd, vs => vmnand.mm vd, vs, vs # Invert bits
+----
+
+NOTE: The `vmmv.m` instruction was previously called `vmcpy.m`, but
+with new layout it is more consistent to name as a "mv" because bits
+are copied without interpretation. The `vmcpy.m` assembler
+pseudoinstruction can be retained for compatibility. For
+implementations that internally rearrange bits according to EEW, a
+`vmmv.m` instruction with same source and destination can be used as
+idiom to force an internal reformat into a mask vector.
+
+The set of eight mask logical instructions can generate any of the 16
+possibly binary logical functions of the two input masks:
+
+[cols="1,1,1,1,12"]
+|===
+4+| inputs |
+
+| 0 | 0 | 1 | 1 | src1
+| 0 | 1 | 0 | 1 | src2
+|===
+
+[cols="1,1,1,1,6,6"]
+|===
+4+| output | instruction | pseudoinstruction
+
+| 0 | 0 | 0 | 0 | vmxor.mm vd, vd, vd | vmclr.m vd
+| 1 | 0 | 0 | 0 | vmnor.mm vd, src1, src2 |
+| 0 | 1 | 0 | 0 | vmandn.mm vd, src2, src1 |
+| 1 | 1 | 0 | 0 | vmnand.mm vd, src1, src1 | vmnot.m vd, src1
+| 0 | 0 | 1 | 0 | vmandn.mm vd, src1, src2 |
+| 1 | 0 | 1 | 0 | vmnand.mm vd, src2, src2 | vmnot.m vd, src2
+| 0 | 1 | 1 | 0 | vmxor.mm vd, src1, src2 |
+| 1 | 1 | 1 | 0 | vmnand.mm vd, src1, src2 |
+| 0 | 0 | 0 | 1 | vmand.mm vd, src1, src2 |
+| 1 | 0 | 0 | 1 | vmxnor.mm vd, src1, src2 |
+| 0 | 1 | 0 | 1 | vmand.mm vd, src2, src2 | vmmv.m vd, src2
+| 1 | 1 | 0 | 1 | vmorn.mm vd, src2, src1 |
+| 0 | 0 | 1 | 1 | vmand.mm vd, src1, src1 | vmmv.m vd, src1
+| 1 | 0 | 1 | 1 | vmorn.mm vd, src1, src2 |
+| 0 | 1 | 1 | 1 | vmor.mm vd, src1, src2 |
+| 1 | 1 | 1 | 1 | vmxnor.mm vd, vd, vd | vmset.m vd
+|===
+
+NOTE: The vector mask logical instructions are designed to be easily
+fused with a following masked vector operation to effectively expand
+the number of predicate registers by moving values into `v0` before
+use.
+
+
+==== Vector count population in mask `vcpop.m`
+
+----
+ vcpop.m rd, vs2, vm
+----
+
+NOTE: This instruction previously had the assembler mnemonic `vpopc.m`
+but was renamed to be consistent with the scalar instruction. The
+assembler instruction alias `vpopc.m` is being retained for software
+compatibility.
+
+The source operand is a single vector register holding mask register
+values as described in Section <<sec-mask-register-layout>>.
+
+The `vcpop.m` instruction counts the number of mask elements of the
+active elements of the vector source mask register that have the value
+1 and writes the result to a scalar `x` register.
+
+The operation can be performed under a mask, in which case only the
+masked elements are counted.
+
+----
+ vcpop.m rd, vs2, v0.t # x[rd] = sum_i ( vs2.mask[i] && v0.mask[i] )
+----
+
+The `vcpop.m` instruction writes `x[rd]` even if `vl`=0 (with the
+value 0, since no mask elements are active).
+
+Traps on `vcpop.m` are always reported with a `vstart` of 0. The
+`vcpop.m` instruction will raise an illegal instruction exception if
+`vstart` is non-zero.
+
+==== `vfirst` find-first-set mask bit
+
+----
+ vfirst.m rd, vs2, vm
+----
+
+The `vfirst` instruction finds the lowest-numbered active element of
+the source mask vector that has the value 1 and writes that element's
+index to a GPR. If no active element has the value 1, -1 is written
+to the GPR.
+
+NOTE: Software can assume that any negative value (highest bit set)
+corresponds to no element found, as vector lengths will never reach
+2^(XLEN-1)^ on any implementation.
+
+The `vfirst.m` instruction writes `x[rd]` even if `vl`=0 (with the
+value -1, since no mask elements are active).
+
+Traps on `vfirst` are always reported with a `vstart` of 0. The
+`vfirst` instruction will raise an illegal instruction exception if
+`vstart` is non-zero.
+
+==== `vmsbf.m` set-before-first mask bit
+
+----
+ vmsbf.m vd, vs2, vm
+
+ # Example
+
+ 7 6 5 4 3 2 1 0 Element number
+
+ 1 0 0 1 0 1 0 0 v3 contents
+ vmsbf.m v2, v3
+ 0 0 0 0 0 0 1 1 v2 contents
+
+ 1 0 0 1 0 1 0 1 v3 contents
+ vmsbf.m v2, v3
+ 0 0 0 0 0 0 0 0 v2
+
+ 0 0 0 0 0 0 0 0 v3 contents
+ vmsbf.m v2, v3
+ 1 1 1 1 1 1 1 1 v2
+
+ 1 1 0 0 0 0 1 1 v0 vcontents
+ 1 0 0 1 0 1 0 0 v3 contents
+ vmsbf.m v2, v3, v0.t
+ 0 1 x x x x 1 1 v2 contents
+----
+
+The `vmsbf.m` instruction takes a mask register as input and writes
+results to a mask register. The instruction writes a 1 to all active
+mask elements before the first active source element that is a 1, then
+writes a 0 to that element and all following active elements. If
+there is no set bit in the active elements of the source vector, then
+all active elements in the destination are written with a 1.
+
+The tail elements in the destination mask register are updated under a
+tail-agnostic policy.
+
+Traps on `vmsbf.m` are always reported with a `vstart` of 0. The
+`vmsbf` instruction will raise an illegal instruction exception if
+`vstart` is non-zero.
+
+The destination register cannot overlap the source register
+and, if masked, cannot overlap the mask register ('v0').
+
+==== `vmsif.m` set-including-first mask bit
+
+The vector mask set-including-first instruction is similar to
+set-before-first, except it also includes the element with a set bit.
+
+----
+ vmsif.m vd, vs2, vm
+
+ # Example
+
+ 7 6 5 4 3 2 1 0 Element number
+
+ 1 0 0 1 0 1 0 0 v3 contents
+ vmsif.m v2, v3
+ 0 0 0 0 0 1 1 1 v2 contents
+
+ 1 0 0 1 0 1 0 1 v3 contents
+ vmsif.m v2, v3
+ 0 0 0 0 0 0 0 1 v2
+
+ 1 1 0 0 0 0 1 1 v0 vcontents
+ 1 0 0 1 0 1 0 0 v3 contents
+ vmsif.m v2, v3, v0.t
+ 1 1 x x x x 1 1 v2 contents
+----
+
+The tail elements in the destination mask register are updated under a
+tail-agnostic policy.
+
+Traps on `vmsif.m` are always reported with a `vstart` of 0. The
+`vmsif` instruction will raise an illegal instruction exception if
+`vstart` is non-zero.
+
+The destination register cannot overlap the source register
+and, if masked, cannot overlap the mask register ('v0').
+
+==== `vmsof.m` set-only-first mask bit
+
+The vector mask set-only-first instruction is similar to
+set-before-first, except it only sets the first element with a bit
+set, if any.
+
+----
+ vmsof.m vd, vs2, vm
+
+ # Example
+
+ 7 6 5 4 3 2 1 0 Element number
+
+ 1 0 0 1 0 1 0 0 v3 contents
+ vmsof.m v2, v3
+ 0 0 0 0 0 1 0 0 v2 contents
+
+ 1 0 0 1 0 1 0 1 v3 contents
+ vmsof.m v2, v3
+ 0 0 0 0 0 0 0 1 v2
+
+ 1 1 0 0 0 0 1 1 v0 vcontents
+ 1 1 0 1 0 1 0 0 v3 contents
+ vmsof.m v2, v3, v0.t
+ 0 1 x x x x 0 0 v2 contents
+----
+
+The tail elements in the destination mask register are updated under a
+tail-agnostic policy.
+
+Traps on `vmsof.m` are always reported with a `vstart` of 0. The
+`vmsof` instruction will raise an illegal instruction exception if
+`vstart` is non-zero.
+
+The destination register cannot overlap the source register
+and, if masked, cannot overlap the mask register ('v0').
+
+==== Example using vector mask instructions
+
+The following is an example of vectorizing a data-dependent exit loop.
+
+----
+include::example/strcpy.s[lines=4..-1]
+----
+----
+include::example/strncpy.s[lines=4..-1]
+----
+
+==== Vector Iota Instruction
+
+The `viota.m` instruction reads a source vector mask register and
+writes to each element of the destination vector register group the
+sum of all the bits of elements in the mask register
+whose index is less than the element, e.g., a parallel prefix sum of
+the mask values.
+
+This instruction can be masked, in which case only the enabled
+elements contribute to the sum.
+
+----
+ viota.m vd, vs2, vm
+
+ # Example
+
+ 7 6 5 4 3 2 1 0 Element number
+
+ 1 0 0 1 0 0 0 1 v2 contents
+ viota.m v4, v2 # Unmasked
+ 2 2 2 1 1 1 1 0 v4 result
+
+ 1 1 1 0 1 0 1 1 v0 contents
+ 1 0 0 1 0 0 0 1 v2 contents
+ 2 3 4 5 6 7 8 9 v4 contents
+ viota.m v4, v2, v0.t # Masked, vtype.vma=0
+ 1 1 1 5 1 7 1 0 v4 results
+----
+
+The result value is zero-extended to fill the destination element if
+SEW is wider than the result. If the result value would overflow the
+destination SEW, the least-significant SEW bits are retained.
+
+Traps on `viota.m` are always reported with a `vstart` of 0, and
+execution is always restarted from the beginning when resuming after a
+trap handler. An illegal instruction exception is raised if `vstart`
+is non-zero.
+
+The destination register group cannot overlap the source register
+and, if masked, cannot overlap the mask register (`v0`).
+
+The `viota.m` instruction can be combined with memory scatter
+instructions (indexed stores) to perform vector compress functions.
+
+----
+ # Compact non-zero elements from input memory array to output memory array
+ #
+ # size_t compact_non_zero(size_t n, const int* in, int* out)
+ # {
+ # size_t i;
+ # size_t count = 0;
+ # int *p = out;
+ #
+ # for (i=0; i<n; i++)
+ # {
+ # const int v = *in++;
+ # if (v != 0)
+ # *p++ = v;
+ # }
+ #
+ # return (size_t) (p - out);
+ # }
+ #
+ # a0 = n
+ # a1 = &in
+ # a2 = &out
+
+compact_non_zero:
+ li a6, 0 # Clear count of non-zero elements
+loop:
+ vsetvli a5, a0, e32, m8, ta, ma # 32-bit integers
+ vle32.v v8, (a1) # Load input vector
+ sub a0, a0, a5 # Decrement number done
+ slli a5, a5, 2 # Multiply by four bytes
+ vmsne.vi v0, v8, 0 # Locate non-zero values
+ add a1, a1, a5 # Bump input pointer
+ vcpop.m a5, v0 # Count number of elements set in v0
+ viota.m v16, v0 # Get destination offsets of active elements
+ add a6, a6, a5 # Accumulate number of elements
+ vsll.vi v16, v16, 2, v0.t # Multiply offsets by four bytes
+ slli a5, a5, 2 # Multiply number of non-zero elements by four bytes
+ vsuxei32.v v8, (a2), v16, v0.t # Scatter using scaled viota results under mask
+ add a2, a2, a5 # Bump output pointer
+ bnez a0, loop # Any more?
+
+ mv a0, a6 # Return count
+ ret
+----
+
+==== Vector Element Index Instruction
+
+The `vid.v` instruction writes each element's index to the
+destination vector register group, from 0 to `vl`-1.
+
+----
+ vid.v vd, vm # Write element ID to destination.
+----
+
+The instruction can be masked. Masking does not change the
+index value written to active elements.
+
+The `vs2` field of the instruction must be set to `v0`, otherwise the
+encoding is _reserved_.
+
+The result value is zero-extended to fill the destination element if
+SEW is wider than the result. If the result value would overflow the
+destination SEW, the least-significant SEW bits are retained.
+
+NOTE: Microarchitectures can implement `vid.v` instruction using the
+same datapath as `viota.m` but with an implicit set mask source.
+
+[[sec-vector-permute]]
+=== Vector Permutation Instructions
+
+A range of permutation instructions are provided to move elements
+around within the vector registers.
+
+==== Integer Scalar Move Instructions
+
+The integer scalar read/write instructions transfer a single
+value between a scalar `x` register and element 0 of a vector
+register. The instructions ignore LMUL and vector register groups.
+
+----
+vmv.x.s rd, vs2 # x[rd] = vs2[0] (vs1=0)
+vmv.s.x vd, rs1 # vd[0] = x[rs1] (vs2=0)
+----
+
+The `vmv.x.s` instruction copies a single SEW-wide element from index 0 of the
+source vector register to a destination integer register. If SEW > XLEN, the
+least-significant XLEN bits are transferred and the upper SEW-XLEN bits are
+ignored. If SEW < XLEN, the value is sign-extended to XLEN bits.
+
+NOTE: `vmv.x.s` performs its operation even if `vstart` {ge} `vl` or `vl`=0.
+
+The `vmv.s.x` instruction copies the scalar integer register to element 0 of
+the destination vector register. If SEW < XLEN, the least-significant bits
+are copied and the upper XLEN-SEW bits are ignored. If SEW > XLEN, the value
+is sign-extended to SEW bits. The other elements in the destination vector
+register ( 0 < index < VLEN/SEW) are treated as tail elements using the current tail agnostic/undisturbed policy. If `vstart` {ge} `vl`, no
+operation is performed and the destination register is not updated.
+
+NOTE: As a consequence, when `vl`=0, no elements are updated in the
+destination vector register group, regardless of `vstart`.
+
+The encodings corresponding to the masked versions (`vm=0`) of `vmv.x.s`
+and `vmv.s.x` are reserved.
+
+==== Floating-Point Scalar Move Instructions
+
+The floating-point scalar read/write instructions transfer a single
+value between a scalar `f` register and element 0 of a vector
+register. The instructions ignore LMUL and vector register groups.
+
+----
+vfmv.f.s rd, vs2 # f[rd] = vs2[0] (rs1=0)
+vfmv.s.f vd, rs1 # vd[0] = f[rs1] (vs2=0)
+----
+
+The `vfmv.f.s` instruction copies a single SEW-wide element from index
+0 of the source vector register to a destination scalar floating-point
+register.
+
+NOTE: `vfmv.f.s` performs its operation even if `vstart` {ge} `vl` or `vl`=0.
+
+The `vfmv.s.f` instruction copies the scalar floating-point register
+to element 0 of the destination vector register. The other elements
+in the destination vector register ( 0 < index < VLEN/SEW) are treated
+as tail elements using the current tail agnostic/undisturbed policy.
+If `vstart` {ge} `vl`, no operation is performed and the destination
+register is not updated.
+
+NOTE: As a consequence, when `vl`=0, no elements are updated in the
+destination vector register group, regardless of `vstart`.
+
+The encodings corresponding to the masked versions (`vm=0`) of `vfmv.f.s`
+and `vfmv.s.f` are reserved.
+
+==== Vector Slide Instructions
+
+The slide instructions move elements up and down a vector register
+group.
+
+NOTE: The slide operations can be implemented much more efficiently
+than using the arbitrary register gather instruction. Implementations
+may optimize certain OFFSET values for `vslideup` and `vslidedown`.
+In particular, power-of-2 offsets may operate substantially faster
+than other offsets.
+
+For all of the `vslideup`, `vslidedown`, `v[f]slide1up`, and
+`v[f]slide1down` instructions, if `vstart` {ge} `vl`, the instruction performs no
+operation and leaves the destination vector register unchanged.
+
+NOTE: As a consequence, when `vl`=0, no elements are updated in the
+destination vector register group, regardless of `vstart`.
+
+The tail agnostic/undisturbed policy is followed for tail elements.
+
+The slide instructions may be masked, with mask element _i_
+controlling whether _destination_ element _i_ is written. The mask
+undisturbed/agnostic policy is followed for inactive elements.
+
+===== Vector Slideup Instructions
+
+----
+ vslideup.vx vd, vs2, rs1, vm # vd[i+x[rs1]] = vs2[i]
+ vslideup.vi vd, vs2, uimm, vm # vd[i+uimm] = vs2[i]
+----
+
+For `vslideup`, the value in `vl` specifies the maximum number of destination
+elements that are written. The start index (_OFFSET_) for the
+destination can be either specified using an unsigned integer in the
+`x` register specified by `rs1`, or a 5-bit immediate, zero-extended to XLEN bits.
+If XLEN > SEW, _OFFSET_ is _not_ truncated to SEW bits.
+Destination elements _OFFSET_ through `vl`-1 are written if unmasked and
+if _OFFSET_ < `vl`.
+
+----
+ vslideup behavior for destination elements (`vstart` < `vl`)
+
+ OFFSET is amount to slideup, either from x register or a 5-bit immediate
+
+ 0 <= i < min(vl, max(vstart, OFFSET)) Unchanged
+ max(vstart, OFFSET) <= i < vl vd[i] = vs2[i-OFFSET] if v0.mask[i] enabled
+ vl <= i < VLMAX Follow tail policy
+----
+
+The destination vector register group for `vslideup` cannot overlap
+the source vector register group, otherwise the instruction encoding
+is reserved.
+
+NOTE: The non-overlap constraint avoids WAR hazards on the
+input vectors during execution, and enables restart with non-zero
+`vstart`.
+
+===== Vector Slidedown Instructions
+
+----
+ vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+x[rs1]]
+ vslidedown.vi vd, vs2, uimm, vm # vd[i] = vs2[i+uimm]
+----
+
+For `vslidedown`, the value in `vl` specifies the maximum number of
+destination elements that are written. The remaining elements past
+`vl` are handled according to the current tail policy (Section
+<<sec-agnostic>>).
+
+The start index (_OFFSET_) for the source can be either specified
+using an unsigned integer in the `x` register specified by `rs1`, or a
+5-bit immediate, zero-extended to XLEN bits.
+If XLEN > SEW, _OFFSET_ is _not_ truncated to SEW bits.
+
+----
+ vslidedown behavior for source elements for element i in slide (`vstart` < `vl`)
+ 0 <= i+OFFSET < VLMAX src[i] = vs2[i+OFFSET]
+ VLMAX <= i+OFFSET src[i] = 0
+
+ vslidedown behavior for destination element i in slide (`vstart` < `vl`)
+ 0 <= i < vstart Unchanged
+ vstart <= i < vl vd[i] = src[i] if v0.mask[i] enabled
+ vl <= i < VLMAX Follow tail policy
+
+----
+
+===== Vector Slide1up
+
+Variants of slide are provided that only move by one element but which
+also allow a scalar integer value to be inserted at the vacated
+element position.
+
+----
+ vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i]
+----
+
+The `vslide1up` instruction places the `x` register argument at
+location 0 of the destination vector register group, provided that
+element 0 is active, otherwise the destination element update follows the
+current mask agnostic/undisturbed policy. If XLEN < SEW, the value is
+sign-extended to SEW bits. If XLEN > SEW, the least-significant bits
+are copied over and the high XLEN-SEW bits are ignored.
+
+The remaining active `vl`-1 elements are copied over from index _i_ in
+the source vector register group to index _i_+1 in the destination
+vector register group.
+
+The `vl` register specifies the maximum number of destination vector
+register elements updated with source values, and remaining elements
+past `vl` are handled according to the current tail policy (Section
+<<sec-agnostic>>).
+
+
+----
+ vslide1up behavior when vl > 0
+
+ i < vstart unchanged
+ 0 = i = vstart vd[i] = x[rs1] if v0.mask[i] enabled
+ max(vstart, 1) <= i < vl vd[i] = vs2[i-1] if v0.mask[i] enabled
+ vl <= i < VLMAX Follow tail policy
+----
+
+The `vslide1up` instruction requires that the destination vector
+register group does not overlap the source vector register group.
+Otherwise, the instruction encoding is reserved.
+
+[[sec-vfslide1up]]
+===== Vector Floating-Point Slide1up Instruction
+
+----
+ vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i]
+----
+
+The `vfslide1up` instruction is defined analogously to `vslide1up`,
+but sources its scalar argument from an `f` register.
+
+===== Vector Slide1down Instruction
+
+The `vslide1down` instruction copies the first `vl`-1 active elements
+values from index _i_+1 in the source vector register group to index
+_i_ in the destination vector register group.
+
+The `vl` register specifies the maximum number of destination vector
+register elements written with source values, and remaining elements
+past `vl` are handled according to the current tail policy (Section
+<<sec-agnostic>>).
+
+----
+ vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1]
+----
+
+The `vslide1down` instruction places the `x` register argument at
+location `vl`-1 in the destination vector register, provided that
+element `vl-1` is active, otherwise the destination element update
+follows the current mask agnostic/undisturbed policy.
+If XLEN < SEW, the value is sign-extended to SEW bits. If
+XLEN > SEW, the least-significant bits are copied over and the high
+SEW-XLEN bits are ignored.
+
+----
+ vslide1down behavior
+
+ i < vstart unchanged
+ vstart <= i < vl-1 vd[i] = vs2[i+1] if v0.mask[i] enabled
+ vstart <= i = vl-1 vd[vl-1] = x[rs1] if v0.mask[i] enabled
+ vl <= i < VLMAX Follow tail policy
+----
+
+NOTE: The `vslide1down` instruction can be used to load values into a
+vector register without using memory and without disturbing other
+vector registers. This provides a path for debuggers to modify the
+contents of a vector register, albeit slowly, with multiple repeated
+`vslide1down` invocations.
+
+[[sec-vfslide1down]]
+===== Vector Floating-Point Slide1down Instruction
+
+----
+ vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1]
+----
+
+The `vfslide1down` instruction is defined analogously to `vslide1down`,
+but sources its scalar argument from an `f` register.
+
+==== Vector Register Gather Instructions
+
+The vector register gather instructions read elements from a first
+source vector register group at locations given by a second source
+vector register group. The index values in the second vector are
+treated as unsigned integers. The source vector can be read at any
+index < VLMAX regardless of `vl`. The maximum number of elements to write to
+the destination register is given by `vl`, and the remaining elements
+past `vl` are handled according to the current tail policy
+(Section <<sec-agnostic>>). The operation can be masked, and the mask
+undisturbed/agnostic policy is followed for inactive elements.
+
+----
+vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+vrgatherei16.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+----
+
+The `vrgather.vv` form uses SEW/LMUL for both the data and
+indices. The `vrgatherei16.vv` form uses SEW/LMUL for the data in
+`vs2` but EEW=16 and EMUL = (16/SEW)*LMUL for the indices in `vs1`.
+
+NOTE: When SEW=8, `vrgather.vv` can only reference vector elements
+0-255. The `vrgatherei16` form can index 64K elements, and can also
+be used to reduce the register capacity needed to hold indices when
+SEW > 16.
+
+If an element index is out of range ( `vs1[i]` {ge} VLMAX )
+then zero is returned for the element value.
+
+Vector-scalar and vector-immediate forms of the register gather are
+also provided. These read one element from the source vector at the
+given index, and write this value to the active elements
+of the destination vector register. The index value in the scalar
+register and the immediate, zero-extended to XLEN bits, are treated as
+unsigned integers. If XLEN > SEW, the index value is _not_ truncated
+to SEW bits.
+
+NOTE: These forms allow any vector element to be "splatted" to an entire vector.
+
+----
+vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]]
+vrgather.vi vd, vs2, uimm, vm # vd[i] = (uimm >= VLMAX) ? 0 : vs2[uimm]
+----
+
+For any `vrgather` instruction, the destination vector register group
+cannot overlap with the source vector register groups, otherwise the
+instruction encoding is reserved.
+
+==== Vector Compress Instruction
+
+The vector compress instruction allows elements selected by a vector
+mask register from a source vector register group to be packed into
+contiguous elements at the start of the destination vector register
+group.
+
+----
+ vcompress.vm vd, vs2, vs1 # Compress into vd elements of vs2 where vs1 is enabled
+----
+
+The vector mask register specified by `vs1` indicates which of the
+first `vl` elements of vector register group `vs2` should be extracted
+and packed into contiguous elements at the beginning of vector
+register `vd`. The remaining elements of `vd` are treated as tail
+elements according to the current tail policy (Section
+<<sec-agnostic>>).
+
+----
+ Example use of vcompress instruction
+
+ 8 7 6 5 4 3 2 1 0 Element number
+
+ 1 1 0 1 0 0 1 0 1 v0
+ 8 7 6 5 4 3 2 1 0 v1
+ 1 2 3 4 5 6 7 8 9 v2
+ vsetivli t0, 9, e8, m1, tu, ma
+ vcompress.vm v2, v1, v0
+ 1 2 3 4 8 7 5 2 0 v2
+----
+
+`vcompress` is encoded as an unmasked instruction (`vm=1`). The equivalent
+masked instruction (`vm=0`) is reserved.
+
+The destination vector register group cannot overlap the source vector
+register group or the source mask register, otherwise the instruction
+encoding is reserved.
+
+A trap on a `vcompress` instruction is always reported with a
+`vstart` of 0. Executing a `vcompress` instruction with a non-zero
+`vstart` raises an illegal instruction exception.
+
+NOTE: Although possible, `vcompress` is one of the more difficult
+instructions to restart with a non-zero `vstart`, so assumption is
+implementations will choose not do that but will instead restart from
+element 0. This does mean elements in destination register after
+`vstart` will already have been updated.
+
+===== Synthesizing `vdecompress`
+
+There is no inverse `vdecompress` provided, as this operation can be
+readily synthesized using iota and a masked vrgather:
+
+----
+ Desired functionality of 'vdecompress'
+ 7 6 5 4 3 2 1 0 # vid
+
+ e d c b a # packed vector of 5 elements
+ 1 0 0 1 1 1 0 1 # mask vector of 8 elements
+ p q r s t u v w # destination register before vdecompress
+
+ e q r d c b v a # result of vdecompress
+----
+
+----
+ # v0 holds mask
+ # v1 holds packed data
+ # v11 holds input expanded vector and result
+ viota.m v10, v0 # Calc iota from mask in v0
+ vrgather.vv v11, v1, v10, v0.t # Expand into destination
+----
+----
+ p q r s t u v w # v11 destination register
+ e d c b a # v1 source vector
+ 1 0 0 1 1 1 0 1 # v0 mask vector
+
+ 4 4 4 3 2 1 1 0 # v10 result of viota.m
+ e q r d c b v a # v11 destination after vrgather using viota.m under mask
+----
+
+==== Whole Vector Register Move
+
+The `vmv<nr>r.v` instructions copy whole vector registers (i.e., all
+VLEN bits) and can copy whole vector register groups. The `nr` value
+in the opcode is the number of individual vector registers, NREG, to
+copy. The instructions operate as if EEW=SEW, EMUL = NREG, effective
+length `evl`= EMUL * VLEN/SEW.
+
+NOTE: These instructions are intended to aid compilers to shuffle
+vector registers without needing to know or change `vl` or `vtype`.
+
+NOTE: The usual property that no elements are written if `vstart` {ge} `vl`
+does not apply to these instructions.
+Instead, no elements are written if `vstart` {ge} `evl`.
+
+NOTE: If `vd` is equal to `vs2` the instruction is an architectural
+NOP, but is treated as a hint to implementations that rearrange data
+internally that the register group will next be accessed with an EEW
+equal to SEW.
+
+The instruction is encoded as an OPIVI instruction. The number of
+vector registers to copy is encoded in the low three bits of the
+`simm` field (`simm[2:0]`) using the same encoding as the `nf[2:0]` field for memory
+instructions (Figure <<fig-nf>>), i.e., `simm[2:0]` = NREG-1.
+
+The value of NREG must be 1, 2, 4, or 8, and values of `simm[4:0]`
+other than 0, 1, 3, and 7 are reserved.
+
+NOTE: A future extension may support other numbers of registers to be moved.
+
+NOTE: The instruction uses the same funct6 encoding as the `vsmul`
+instruction but with an immediate operand, and only the unmasked
+version (`vm=1`). This encoding is chosen as it is close to the
+related `vmerge` encoding, and it is unlikely the `vsmul` instruction
+would benefit from an immediate form.
+
+----
+ vmv<nr>r.v vd, vs2 # General form
+
+ vmv1r.v v1, v2 # Copy v1=v2
+ vmv2r.v v10, v12 # Copy v10=v12; v11=v13
+ vmv4r.v v4, v8 # Copy v4=v8; v5=v9; v6=v10; v7=v11
+ vmv8r.v v0, v8 # Copy v0=v8; v1=v9; ...; v7=v15
+----
+
+The source and destination vector register numbers must be aligned
+appropriately for the vector register group size, and encodings with
+other vector register numbers are reserved.
+
+NOTE: A future extension may relax the vector register alignment
+restrictions.
+
+=== Exception Handling
+
+On a trap during a vector instruction (caused by either a synchronous
+exception or an asynchronous interrupt), the existing `*epc` CSR is
+written with a pointer to the trapping vector instruction, while the
+`vstart` CSR contains the element index on which the trap was
+taken.
+
+NOTE: We chose to add a `vstart` CSR to allow resumption of a
+partially executed vector instruction to reduce interrupt latencies
+and to simplify forward-progress guarantees. This is similar to the
+scheme in the IBM 3090 vector facility. To ensure forward progress
+without the `vstart` CSR, implementations would have to guarantee an
+entire vector instruction can always complete atomically without
+generating a trap. This is particularly difficult to ensure in the
+presence of strided or scatter/gather operations and demand-paged
+virtual memory.
+
+==== Precise vector traps
+
+NOTE: We assume most supervisor-mode environments with demand-paging
+will require precise vector traps.
+
+Precise vector traps require that:
+
+. all instructions older than the trapping vector instruction have committed their results
+. no instructions newer than the trapping vector instruction have altered architectural state
+. any operations within the trapping vector instruction affecting result elements preceding the index in the `vstart` CSR have committed their results
+. no operations within the trapping vector instruction affecting elements at or following the `vstart` CSR have altered architectural state except if restarting and completing the affected vector instruction will nevertheless produce the correct final state.
+
+We relax the last requirement to allow elements following `vstart` to
+have been updated at the time the trap is reported, provided that
+re-executing the instruction from the given `vstart` will correctly
+overwrite those elements.
+
+In idempotent memory regions, vector store instructions may have
+updated elements in memory past the element causing a synchronous
+trap. Non-idempotent memory regions must not have been updated for
+indices equal to or greater than the element that caused a synchronous
+trap during a vector store instruction.
+
+Except where noted above, vector instructions are allowed to overwrite
+their inputs, and so in most cases, the vector instruction restart
+must be from the `vstart` element index. However, there are a number of
+cases where this overwrite is prohibited to enable execution of the
+vector instructions to be idempotent and hence restartable from an
+earlier index location.
+
+Implementations must ensure forward progress can be eventually
+guaranteed for the element or segment reported by `vstart`.
+
+==== Imprecise vector traps
+
+Imprecise vector traps are traps that are not precise. In particular,
+instructions newer than `*epc` may have committed results, and
+instructions older than `*epc` may have not completed execution.
+Imprecise traps are primarily intended to be used in situations where
+reporting an error and terminating execution is the appropriate
+response.
+
+NOTE: A profile might specify that interrupts are precise while other
+traps are imprecise. We assume many embedded implementations will
+generate only imprecise traps for vector instructions on fatal errors,
+as they will not require resumable traps.
+
+Imprecise traps shall report the faulting element in `vstart` for
+traps caused by synchronous vector exceptions.
+
+There is no support for imprecise traps in the current standard extensions.
+
+==== Selectable precise/imprecise traps
+
+Some profiles may choose to provide a privileged mode bit to select
+between precise and imprecise vector traps. Imprecise mode would run
+at high-performance but possibly make it difficult to discern error
+causes, while precise mode would run more slowly, but support
+debugging of errors albeit with a possibility of not experiencing the
+same errors as in imprecise mode.
+
+This mechanism is not defined in the current standard extensions.
+
+==== Swappable traps
+
+Another trap mode can support swappable state in the vector unit,
+where on a trap, special instructions can save and restore the vector
+unit microarchitectural state, to allow execution to continue
+correctly around imprecise traps.
+
+This mechanism is not defined in the current standard extensions.
+
+NOTE: A future extension might define a standard way of saving and
+restoring opaque microarchitectural state from a vector unit
+implementation to support context switching with imprecise traps.
+
+[[sec-vector-extensions]]
+=== Standard Vector Extensions
+
+This section describes the standard vector extensions.
+A set of smaller extensions intended for embedded
+use are named with a "Zve" prefix, while a larger vector extension
+designed for application processors is named as a single-letter V
+extension. A set of vector length extension names with prefix "Zvl"
+are also provided.
+
+The initial vector extensions are designed to act as a base for
+additional vector extensions in various domains, including
+cryptography and machine learning.
+
+==== Zvl*: Minimum Vector Length Standard Extensions
+
+All standard vector extensions have a minimum required VLEN as
+described below. A set of vector length extensions are provided to
+increase the minimum vector length of a vector extension.
+
+NOTE: The vector length extensions can be used to either specify
+additional software or architecture profile requirements, or to
+advertise hardware capabilities.
+
+.Vector length extensions
+[cols="1,1"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Extension | Minimum VLEN
+
+| Zvl32b | 32
+| Zvl64b | 64
+| Zvl128b | 128
+| Zvl256b | 256
+| Zvl512b | 512
+| Zvl1024b | 1024
+|===
+
+NOTE: Longer vector length extensions should follow the same pattern.
+
+NOTE: Every vector length extension effectively includes all shorter
+vector length extensions.
+
+NOTE: The syntax for extension names is being revised, and these names
+are subject to change. The trailing "b" will be required to
+disambiguate numeric fields from version numbers.
+
+NOTE: Explicit use of the Zvl32b extension string is not required for
+any standard vector extension as they all effectively mandate at least
+this minimum, but the string can be useful when stating hardware
+capabilities.
+
+==== Zve*: Vector Extensions for Embedded Processors
+
+The following five standard extensions are defined to provide varying
+degrees of vector support and are intended for use with embedded
+processors. Any of these extensions can be added to base ISAs with
+XLEN=32 or XLEN=64. The table lists the minimum VLEN and supported
+EEWs for each extension as well as what floating-point types are
+supported.
+
+.Embedded vector extensions
+[cols="1,1,2,1,1"]
+[%autowidth,float="center",align="center",options="header"]
+|===
+| Extension | Minimum VLEN | Supported EEW | FP32 | FP64
+
+| Zve32x | 32 | 8, 16, 32 | N | N
+| Zve32f | 32 | 8, 16, 32 | Y | N
+| Zve64x | 64 | 8, 16, 32, 64 | N | N
+| Zve64f | 64 | 8, 16, 32, 64 | Y | N
+| Zve64d | 64 | 8, 16, 32, 64 | Y | Y
+|===
+
+The Zve32f and Zve64x extensions depend on the Zve32x extension.
+The Zve64f extension depends on the Zve32f and Zve64x extensions.
+The Zve64d extension depends on the Zve64f extension.
+
+All Zve* extensions have precise traps.
+
+NOTE: There is currently no standard support for handling imprecise
+traps, so standard extensions have to provide precise traps.
+
+All Zve* extensions provide support for EEW of 8, 16, and 32, and
+Zve64* extensions also support EEW of 64.
+
+All Zve* extensions support the vector configuration instructions
+(Section <<sec-vector-config>>).
+
+All Zve* extensions support all vector load and store instructions
+(Section <<sec-vector-memory>>), except Zve64* extensions do not
+support EEW=64 for index values when XLEN=32.
+
+All Zve* extensions support all vector integer instructions (Section
+<<sec-vector-integer>>), except that the `vmulh` integer multiply
+variants that return the high word of the product (`vmulh.vv`,
+`vmulh.vx`, `vmulhu.vv`, `vmulhu.vx`, `vmulhsu.vv`, `vmulhsu.vx`) are
+not included for EEW=64 in Zve64*.
+
+NOTE: Producing the high-word of a product can take substantial
+additional gates for large EEW.
+
+All Zve* extensions support all vector fixed-point arithmetic
+instructions (<<sec-vector-fixed-point>>), except that `vsmul.vv` and
+`vsmul.vx` are not included in EEW=64 in Zve64*.
+
+NOTE: As with `vmulh`, `vsmul` requires a large amount of additional
+logic, and 64-bit fixed-point multiplies are relatively rare.
+
+All Zve* extensions support all vector integer single-width and
+widening reduction operations (Sections <<sec-vector-integer-reduce>>,
+<<sec-vector-integer-reduce-widen>>).
+
+All Zve* extensions support all vector mask instructions (Section
+<<sec-vector-mask>>).
+
+All Zve* extensions support all vector permutation instructions
+(Section <<sec-vector-permute>>), except that Zve32x and Zve64x
+do not include those with floating-point operands, and Zve64f does not include those
+with EEW=64 floating-point operands.
+
+The Zve32x extension depends on the Zicsr extension.
+The Zve32f and Zve64f extensions depend upon the F extension,
+and implement all
+vector floating-point instructions (Section <<sec-vector-float>>) for
+floating-point operands with EEW=32. Vector single-width floating-point reduction
+operations (<<sec-vector-float-reduce>>) for EEW=32 are supported.
+
+The Zve64d extension depends upon the D extension,
+and implements all vector
+floating-point instructions (Section <<sec-vector-float>>) for
+floating-point operands with EEW=32 or EEW=64 (including widening
+instructions and conversions between FP32 and FP64). Vector
+single-width floating-point reductions (<<sec-vector-float-reduce>>)
+for EEW=32 and EEW=64 are supported as well as widening reductions
+from FP32 to FP64.
+
+==== V: Vector Extension for Application Processors
+
+The single-letter V extension is intended for use in application
+processor profiles.
+
+The `misa.v` bit is set for implementations providing `misa` and
+supporting V.
+
+The V vector extension has precise traps.
+
+The V vector extension depends upon the Zvl128b and Zve64d extensions.
+
+NOTE: The value of 128 was chosen as a compromise for application
+processors. Providing a larger VLEN allows stripmining code to be
+elided in some cases for short vectors, but also increases the size of
+the minimum implementation. Note that larger LMUL can be used to
+avoid stripmining for longer known-size application vectors at the
+cost of having fewer available vector register groups. For example, an
+LMUL of 8 allows vectors of up to sixteen 64-bit elements to be
+processed without stripmining using four vector register groups.
+
+The V extension supports EEW of 8, 16, and 32, and 64.
+
+The V extension supports the vector configuration instructions
+(Section <<sec-vector-config>>).
+
+The V extension supports all vector load and store instructions
+(Section <<sec-vector-memory>>), except the V extension does not
+support EEW=64 for index values when XLEN=32.
+
+The V extension supports all vector integer instructions (Section
+<<sec-vector-integer>>).
+
+The V extension supports all vector fixed-point arithmetic
+instructions (<<sec-vector-fixed-point>>).
+
+The V extension supports all vector integer single-width and
+widening reduction operations (Sections <<sec-vector-integer-reduce>>,
+<<sec-vector-integer-reduce-widen>>).
+
+The V extension supports all vector mask instructions (Section
+<<sec-vector-mask>>).
+
+The V extension supports all vector permutation instructions (Section
+<<sec-vector-permute>>).
+
+The V extension depends upon the F and D
+extensions, and implements all vector floating-point instructions
+(Section <<sec-vector-float>>) for floating-point operands with EEW=32
+or EEW=64 (including widening instructions and conversions between
+FP32 and FP64). Vector single-width floating-point reductions
+(<<sec-vector-float-reduce>>) for EEW=32 and EEW=64 are supported as
+well as widening reductions from FP32 to FP64.
+
+[NOTE]
+====
+As is the case with other RISC-V extensions, it is valid to
+include overlapping extensions in the same ISA string. For example,
+RV64GCV and RV64GCV_Zve64f are both valid and equivalent ISA strings,
+as is RV64GCV_Zve64f_Zve32x_Zvl128b.
+====
+
+==== Zvfhmin: Vector Extension for Minimal Half-Precision Floating-Point
+
+The Zvfhmin extension provides minimal support for vectors of IEEE 754-2008
+binary16 values, adding conversions to and from binary32.
+When the Zvfhmin extension is implemented, the `vfwcvt.f.f.v` and
+`vfncvt.f.f.w` instructions become defined when SEW=16.
+The EEW=16 floating-point operands of these instructions use the binary16
+format.
+
+The Zvfhmin extension depends on the Zve32f extension.
+
+==== Zvfh: Vector Extension for Half-Precision Floating-Point
+
+The Zvfh extension provides support for vectors of IEEE 754-2008
+binary16 values.
+When the Zvfh extension is implemented, all instructions in Sections
+<<sec-vector-float>>, <<sec-vector-float-reduce>>,
+<<sec-vector-float-reduce-widen>>, <<sec-vector-float-move>>,
+<<sec-vfslide1up>>, and <<sec-vfslide1down>>
+become defined when SEW=16.
+The EEW=16 floating-point operands of these instructions use the binary16
+format.
+
+Additionally, conversions between 8-bit integers and binary16 values are
+provided. The floating-point-to-integer narrowing conversions
+(`vfncvt[.rtz].x[u].f.w`) and integer-to-floating-point
+widening conversions (`vfwcvt.f.x[u].v`) become defined when SEW=8.
+
+The Zvfh extension depends on the Zve32f and Zfhmin extensions.
+
+NOTE: Requiring basic scalar half-precision support makes Zvfh's
+vector-scalar instructions substantially more useful.
+We considered requiring more complete scalar half-precision support, but we
+reasoned that, for many half-precision vector workloads, performing the scalar
+computation in single-precision will suffice.
+
+=== Vector Instruction Listing
+
+include::images/wavedrom/v-inst-table.adoc[]
+
diff --git a/src/vector-examples.adoc b/src/vector-examples.adoc
new file mode 100644
index 0000000..9e54acd
--- /dev/null
+++ b/src/vector-examples.adoc
@@ -0,0 +1,125 @@
+[appendix]
+== Vector Assembly Code Examples
+
+The following are provided as non-normative text to help explain the vector ISA.
+
+=== Vector-vector add example
+
+----
+include::example/vvaddint32.s[lines=4..-1]
+----
+
+=== Example with mixed-width mask and compute.
+
+----
+# Code using one width for predicate and different width for masked
+# compute.
+# int8_t a[]; int32_t b[], c[];
+# for (i=0; i<n; i++) { b[i] = (a[i] < 5) ? c[i] : 1; }
+#
+# Mixed-width code that keeps SEW/LMUL=8
+ loop:
+ vsetvli a4, a0, e8, m1, ta, ma # Byte vector for predicate calc
+ vle8.v v1, (a1) # Load a[i]
+ add a1, a1, a4 # Bump pointer.
+ vmslt.vi v0, v1, 5 # a[i] < 5?
+
+ vsetvli x0, a0, e32, m4, ta, mu # Vector of 32-bit values.
+ sub a0, a0, a4 # Decrement count
+ vmv.v.i v4, 1 # Splat immediate to destination
+ vle32.v v4, (a3), v0.t # Load requested elements of C, others undisturbed
+ sll t1, a4, 2
+ add a3, a3, t1 # Bump pointer.
+ vse32.v v4, (a2) # Store b[i].
+ add a2, a2, t1 # Bump pointer.
+ bnez a0, loop # Any more?
+----
+
+=== Memcpy example
+
+----
+include::example/memcpy.s[lines=4..-1]
+----
+
+=== Conditional example
+
+----
+# (int16) z[i] = ((int8) x[i] < 5) ? (int16) a[i] : (int16) b[i];
+#
+
+loop:
+ vsetvli t0, a0, e8, m1, ta, ma # Use 8b elements.
+ vle8.v v0, (a1) # Get x[i]
+ sub a0, a0, t0 # Decrement element count
+ add a1, a1, t0 # x[i] Bump pointer
+ vmslt.vi v0, v0, 5 # Set mask in v0
+ vsetvli x0, x0, e16, m2, ta, mu # Use 16b elements.
+ slli t0, t0, 1 # Multiply by 2 bytes
+ vle16.v v2, (a2), v0.t # z[i] = a[i] case
+ vmnot.m v0, v0 # Invert v0
+ add a2, a2, t0 # a[i] bump pointer
+ vle16.v v2, (a3), v0.t # z[i] = b[i] case
+ add a3, a3, t0 # b[i] bump pointer
+ vse16.v v2, (a4) # Store z
+ add a4, a4, t0 # z[i] bump pointer
+ bnez a0, loop
+----
+=== SAXPY example
+
+----
+include::example/saxpy.s[lines=4..-1]
+----
+
+=== SGEMM example
+
+----
+include::example/sgemm.S[lines=4..-1]
+----
+
+=== Division approximation example
+
+----
+# v1 = v1 / v2 to almost 23 bits of precision.
+
+vfrec7.v v3, v2 # Estimate 1/v2
+ li t0, 0x40000000
+vmv.v.x v4, t0 # Splat 2.0
+vfnmsac.vv v4, v2, v3 # 2.0 - v2 * est(1/v2)
+vfmul.vv v3, v3, v4 # Better estimate of 1/v2
+vmv.v.x v4, t0 # Splat 2.0
+vfnmsac.vv v4, v2, v3 # 2.0 - v2 * est(1/v2)
+vfmul.vv v3, v3, v4 # Better estimate of 1/v2
+vfmul.vv v1, v1, v3 # Estimate of v1/v2
+----
+
+=== Square root approximation example
+
+----
+# v1 = sqrt(v1) to almost 23 bits of precision.
+
+ fmv.w.x ft0, x0 # Mask off zero inputs
+vmfne.vf v0, v1, ft0 # to avoid div by zero
+vfrsqrt7.v v2, v1, v0.t # Estimate 1/sqrt(x)
+vmfne.vf v0, v2, ft0, v0.t # Additionally mask off +inf inputs
+ li t0, 0x40400000
+vmv.v.x v4, t0 # Splat 3.0
+vfmul.vv v3, v1, v2, v0.t # x * est
+vfnmsub.vv v3, v2, v4, v0.t # - x * est * est + 3
+vfmul.vv v3, v3, v2, v0.t # est * (-x * est * est + 3)
+ li t0, 0x3f000000
+ fmv.w.x ft0, t0 # 0.5
+vfmul.vf v2, v3, ft0, v0.t # Estimate to 14 bits
+vfmul.vv v3, v1, v2, v0.t # x * est
+vfnmsub.vv v3, v2, v4, v0.t # - x * est * est + 3
+vfmul.vv v3, v3, v2, v0.t # est * (-x * est * est + 3)
+vfmul.vf v2, v3, ft0, v0.t # Estimate to 23 bits
+vfmul.vv v1, v2, v1, v0.t # x * 1/sqrt(x)
+----
+
+=== C standard library strcmp example
+
+----
+include::example/strcmp.s[lines=4..-1]
+----
+
+include::fraclmul.adoc[]
diff --git a/src/zam-st-ext.adoc b/src/zam-st-ext.adoc
deleted file mode 100644
index b6fa649..0000000
--- a/src/zam-st-ext.adoc
+++ /dev/null
@@ -1,55 +0,0 @@
-[[zam]]
-== "Zam" Standard Extension for Misaligned Atomics, v0.1
-
-This chapter defines the "Zam" extension, which extends the "A"
-extension by standardizing support for misaligned atomic memory
-operations (AMOs). On platforms implementing "Zam", misaligned AMOs
-need only execute atomically with respect to other accesses (including
-non-atomic loads and stores) to the same address and of the same size.
-More precisely, execution environments implementing "Zam" are subject
-to the following axiom:
-
-[[misaligned]]
-=== Atomicity Axiom for misaligned atomics
-
-If latexmath:[$r$] and latexmath:[$w$] are paired misaligned load and
-store instructions from a hart latexmath:[$h$] with the same address and
-of the same size, then there can be no store instruction latexmath:[$s$]
-from a hart other than latexmath:[$h$] with the same address and of the
-same size as latexmath:[$r$] and latexmath:[$w$] such that a store
-operation generated by latexmath:[$s$] lies in between memory operations
-generated by latexmath:[$r$] and latexmath:[$w$] in the global memory
-order. Furthermore, there can be no load instruction latexmath:[$l$]
-from a hart other than latexmath:[$h$] with the same address and of the
-same size as latexmath:[$r$] and latexmath:[$w$] such that a load
-operation generated by latexmath:[$l$] lies between two memory
-operations generated by latexmath:[$r$] or by latexmath:[$w$] in the
-global memory order.
-
-This restricted form of atomicity is intended to balance the needs of
-applications which require support for misaligned atomics and the
-ability of the implementation to actually provide the necessary degree
-of atomicity.
-
-Aligned instructions under "Zam" continue to behave as they normally
-do under RVWMO.
-
-[NOTE]
-====
-_The intention of "Zam" is that it can be implemented in one of two
-ways:_
-
-. _On hardware that natively supports atomic misaligned accesses to the
-address and size in question (e.g., for misaligned accesses within a
-single cache line): by simply following the same rules that would be
-applied for aligned AMOs._
-. _On hardware that does not natively support misaligned accesses to the
-address and size in question: by trapping on all instructions (including
-loads) with that address and size and executing them (via any number of
-memory operations) inside a mutex that is a function of the given memory
-address and access size. AMOs may be emulated by splitting them into
-separate load and store operations, but all preserved program order
-rules (e.g., incoming and outgoing syntactic dependencies) must behave
-as if the AMO is still a single memory operation._
-====
-
diff --git a/src/zawrs.adoc b/src/zawrs.adoc
new file mode 100644
index 0000000..eb94036
--- /dev/null
+++ b/src/zawrs.adoc
@@ -0,0 +1,105 @@
+== "Zawrs" Standard extension for Wait-on-Reservation-Set instructions, Version 1.01
+
+The Zawrs extension defines a pair of instructions to be used in polling loops
+that allows a core to enter a low-power state and wait on a store to a memory
+location. Waiting for a memory location to be updated is a common pattern in
+many use cases such as:
+
+. Contenders for a lock waiting for the lock variable to be updated.
+
+. Consumers waiting on the tail of an empty queue for the producer to queue
+ work/data. The producer may be code executing on a RISC-V hart, an accelerator
+ device, an external I/O agent.
+
+. Code waiting on a flag to be set in memory indicative of an event occurring.
+ For example, software on a RISC-V hart may wait on a "done" flag to be set in
+ memory by an accelerator device indicating completion of a job previously
+ submitted to the device.
+
+Such use cases involve polling on memory locations, and such busy loops can be a
+wasteful expenditure of energy. To mitigate the wasteful looping in such usages,
+a `WRS.NTO` (WRS-with-no-timeout) instruction is provided. Instead of polling
+for a store to a specific memory location, software registers a reservation set
+that includes all the bytes of the memory location using the `LR` instruction.
+Then a subsequent `WRS.NTO` instruction would cause the hart to temporarily
+stall execution in a low-power state until a store occurs to the reservation set
+or an interrupt is observed.
+
+Sometimes the program waiting on a memory update may also need to carry out a
+task at a future time or otherwise place an upper bound on the wait. To support
+such use cases a second instruction `WRS.STO` (WRS-with-short-timeout) is
+provided that works like `WRS.NTO` but bounds the stall duration to an
+implementation-define short timeout such that the stall is terminated on the
+timeout if no other conditions have occurred to terminate the stall. The
+program using this instruction may then determine if its deadline has been
+reached.
+
+[NOTE]
+====
+The instructions in the Zawrs extension are only useful in conjunction with the
+LR instruction, which is provided by the A extension, and which we also expect
+to be provided by a narrower Zalrsc extension in the future.
+====
+[[Zawrs]]
+=== Wait-on-Reservation-Set Instructions
+
+The `WRS.NTO` and `WRS.STO` instructions cause the hart to temporarily stall
+execution in a low-power state as long as the reservation set is valid and no
+pending interrupts, even if disabled, are observed. For `WRS.STO` the stall
+duration is bounded by an implementation defined short timeout. These
+instructions are available in all privilege modes. These instructions are not
+supported in a constrained `LR`/`SC` loop.
+
+[wavedrom, ,svg]
+....
+{reg: [
+ {bits: 7, name: 'opcode', attr: ['SYSTEM(0x73)'] },
+ {bits: 5, name: 'rd', attr: ['0'] },
+ {bits: 3, name: 'funct3', attr: ['0'] },
+ {bits: 5, name: 'rs1', attr: ['0'] },
+ {bits: 12, name: 'funct12', attr:['WRS.NTO(0x0d)', 'WRS.STO(0x1d)'] },
+], config:{lanes: 1, hspace:1024}}
+....
+
+<<<
+
+Hart execution may be stalled while the following conditions are all satisfied:
+[loweralpha]
+ . The reservation set is valid
+ . If `WRS.STO`, a "short" duration since start of stall has not elapsed
+ . No pending interrupt is observed (see the rules below)
+
+While stalled, an implementation is permitted to occasionally terminate the
+stall and complete execution for any reason.
+
+`WRS.NTO` and `WRS.STO` instructions follow the rules of the `WFI` instruction
+for resuming execution on a pending interrupt.
+
+When the `TW` (Timeout Wait) bit in `mstatus` is set and `WRS.NTO` is executed
+in any privilege mode other than M mode, and it does not complete within an
+implementation-specific bounded time limit, the `WRS.NTO` instruction will cause
+an illegal instruction exception.
+
+When executing in VS or VU mode, if the `VTW` bit is set in `hstatus`, the
+`TW` bit in `mstatus` is clear, and the `WRS.NTO` does not complete within an
+implementation-specific bounded time limit, the `WRS.NTO` instruction will cause
+a virtual instruction exception.
+
+[NOTE]
+====
+Since the `WRS.STO` and `WRS.NTO` instructions can complete execution for
+reasons other than stores to the reservation set, software will likely need
+a means of looping until the required stores have occurred.
+
+The duration of a `WRS.STO` instruction's timeout may vary significantly within
+and among implementations. In typical implementations this duration should be
+roughly in the range of 10 to 100 times an on-chip cache miss latency or a
+cacheless access to main memory.
+
+`WRS.NTO`, unlike `WFI`, is not specified to cause an illegal instruction
+exception if executed in U-mode when the governing `TW` bit is 0. `WFI` is
+typically not expected to be used in U-mode and on many systems may promptly
+cause an illegal instruction exception if used at U-mode. Unlike `WFI`,
+`WRS.NTO` is expected to be used by software in U-mode when waiting on
+memory but without a deadline for that wait.
+==== \ No newline at end of file
diff --git a/src/zc.adoc b/src/zc.adoc
new file mode 100644
index 0000000..2f2ef37
--- /dev/null
+++ b/src/zc.adoc
@@ -0,0 +1,2611 @@
+[#Zc]
+== "Zc*" Standard Extension for Code Size Reduction
+
+=== Zc* Overview
+
+Zc* is a group of extensions that define subsets of the existing C extension (Zca, Zcd, Zcf) and new extensions which only contain 16-bit encodings.
+
+Zcm* all reuse the encodings for _c.fld_, _c.fsd_, _c.fldsp_, _c.fsdsp_.
+
+.Zc* extension overview
+[width="100%",options=header,cols="3,1,1,1,1,1,1"]
+|====================================================================================
+|Instruction |Zca |Zcf |Zcd |Zcb |Zcmp |Zcmt
+7+|*The Zca extension is added as way to refer to instructions in the C extension that do not include the floating-point loads and stores*
+|C excl. c.f* |yes | | | | |
+7+|*The Zcf extension is added as a way to refer to compressed single-precision floating-point load/stores*
+|c.flw | |rv32 | | | |
+|c.flwsp | |rv32 | | | |
+|c.fsw | |rv32 | | | |
+|c.fswsp | |rv32 | | | |
+7+|*The Zcd extension is added as a way to refer to compressed double-precision floating-point load/stores*
+|c.fld | | |yes | | |
+|c.fldsp | | |yes | | |
+|c.fsd | | |yes | | |
+|c.fsdsp | | |yes | | |
+7+|*Simple operations for use on all architectures*
+|c.lbu | | | |yes | |
+|c.lh | | | |yes | |
+|c.lhu | | | |yes | |
+|c.sb | | | |yes | |
+|c.sh | | | |yes | |
+|c.zext.b | | | |yes | |
+|c.sext.b | | | |yes | |
+|c.zext.h | | | |yes | |
+|c.sext.h | | | |yes | |
+|c.zext.w | | | |yes | |
+|c.mul | | | |yes | |
+|c.not | | | |yes | |
+7+|*PUSH/POP and double move which overlap with _c.fsdsp_. Complex operations intended for embedded CPUs*
+|cm.push | | | | |yes |
+|cm.pop | | | | |yes |
+|cm.popret | | | | |yes |
+|cm.popretz | | | | |yes |
+|cm.mva01s | | | | |yes |
+|cm.mvsa01 | | | | |yes |
+7+|*Table jump which overlaps with _c.fsdsp_. Complex operations intended for embedded CPUs*
+|cm.jt | | | | | |yes
+|cm.jalt | | | | | |yes
+|====================================================================================
+
+[#C]
+=== C
+
+The C extension is the superset of the following extensions:
+
+* Zca
+* Zcf if F is specified (RV32 only)
+* Zcd if D is specified
+
+As C defines the same instructions as Zca, Zcf and Zcd, the rule is that:
+
+* C always implies Zca
+* C+F implies Zcf (RV32 only)
+* C+D implies Zcd
+
+[reftext="Zce"]
+=== Zce
+
+The Zce extension is intended to be used for microcontrollers, and includes all relevant Zc extensions.
+
+* Specifying Zce on RV32 without F includes Zca, Zcb, Zcmp, Zcmt
+* Specifying Zce on RV32 with F includes Zca, Zcb, Zcmp, Zcmt _and_ Zcf
+* Specifying Zce on RV64 always includes Zca, Zcb, Zcmp, Zcmt
+** Zcf doesn't exist for RV64
+
+Therefore common ISA strings can be updated as follows to include the relevant Zc extensions, for example:
+
+* RV32IMC becomes RV32IM_Zce
+* RV32IMCF becomes RV32IMF_Zce
+
+[#misaC]
+=== MISA.C
+
+MISA.C is set if the following extensions are selected:
+
+* Zca and not F
+* Zca, Zcf and F is specified (RV32 only)
+* Zca, Zcf and Zcd if D is specified (RV32 only)
+** this configuration excludes Zcmp, Zcmt
+* Zca, Zcd if D is specified (RV64 only)
+** this configuration excludes Zcmp, Zcmt
+
+[reftext="Zca"]
+=== Zca
+
+The Zca extension is added as way to refer to instructions in the C extension that do not include the floating-point loads and stores.
+
+Therefore it _excluded_ all 16-bit floating point loads and stores: _c.flw_, _c.flwsp_, _c.fsw_, _c.fswsp_, _c.fld_, _c.fldsp_, _c.fsd_, _c.fsdsp_.
+
+[NOTE]
+====
+the C extension only includes F/D instructions when D and F are also specified
+====
+
+[reftext="Zcf"]
+=== Zcf (RV32 only)
+
+Zcf is the existing set of compressed single precision floating point loads and stores: _c.flw_, _c.flwsp_, _c.fsw_, _c.fswsp_.
+
+Zcf is only relevant to RV32, it cannot be specified for RV64.
+
+The Zcf extension depends on the <<Zca>> and F extensions.
+
+[reftext="Zcd"]
+=== Zcd
+
+Zcd is the existing set of compressed double precision floating point loads and stores: _c.fld_, _c.fldsp_, _c.fsd_, _c.fsdsp_.
+
+The Zcd extension depends on the <<Zca>> and D extensions.
+
+[reftext="Zcb"]
+=== Zcb
+
+Zcb has simple code-size saving instructions which are easy to implement on all CPUs.
+
+All encodings are currently reserved for all architectures, and have no conflicts with any existing extensions.
+
+NOTE: Zcb can be implemented on _any_ CPU as the instructions are 16-bit versions of existing 32-bit instructions from the application class profile.
+
+The Zcb extension depends on the <<Zca>> extension.
+
+As shown on the individual instruction pages, many of the instructions in Zcb depend upon another extension being implemented. For example, _c.mul_ is only implemented if M or Zmmul is implemented, and _c.sext.b_ is only implemented if Zbb is implemented.
+
+The _c.mul_ encoding uses the CA register format along with other instructions such as _c.sub_, _c.xor_ etc.
+
+[NOTE]
+
+ _c.sext.w_ is a pseudo-instruction for _c.addiw rd, 0_ (RV64)
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|yes
+|yes
+|c.lbu _rd'_, uimm(_rs1'_)
+|<<#insns-c_lbu>>
+
+|yes
+|yes
+|c.lhu _rd'_, uimm(_rs1'_)
+|<<#insns-c_lhu>>
+
+|yes
+|yes
+|c.lh _rd'_, uimm(_rs1'_)
+|<<#insns-c_lh>>
+
+|yes
+|yes
+|c.sb _rs2'_, uimm(_rs1'_)
+|<<#insns-c_sb>>
+
+|yes
+|yes
+|c.sh _rs2'_, uimm(_rs1'_)
+|<<#insns-c_sh>>
+
+|yes
+|yes
+|c.zext.b _rsd'_
+|<<#insns-c_zext_b>>
+
+|yes
+|yes
+|c.sext.b _rsd'_
+|<<#insns-c_sext_b>>
+
+|yes
+|yes
+|c.zext.h _rsd'_
+|<<#insns-c_zext_h>>
+
+|yes
+|yes
+|c.sext.h _rsd'_
+|<<#insns-c_sext_h>>
+
+|
+|yes
+|c.zext.w _rsd'_
+|<<#insns-c_zext_w>>
+
+|yes
+|yes
+|c.not _rsd'_
+|<<#insns-c_not>>
+
+|yes
+|yes
+|c.mul _rsd'_, _rs2'_
+|<<#insns-c_mul>>
+
+|===
+
+<<<
+
+[#Zcmp]
+=== Zcmp
+
+The Zcmp extension is a set of instructions which may be executed as a series of existing 32-bit RISC-V instructions.
+
+This extension reuses some encodings from _c.fsdsp_. Therefore it is _incompatible_ with <<Zcd>>,
+ which is included when C and D extensions are both present.
+
+NOTE: Zcmp is primarily targeted at embedded class CPUs due to implementation complexity. Additionally, it is not compatible with architecture class profiles.
+
+The Zcmp extension depends on the <<Zca>> extension.
+
+The PUSH/POP assembly syntax uses several variables, the meaning of which are:
+
+* _reg_list_ is a list containing 1 to 13 registers (ra and 0 to 12 s registers)
+** valid values: {ra}, {ra, s0}, {ra, s0-s1}, {ra, s0-s2}, ..., {ra, s0-s8}, {ra, s0-s9}, {ra, s0-s11}
+** note that {ra, s0-s10} is _not_ valid, giving 12 lists not 13 for better encoding
+* _stack_adj_ is the total size of the stack frame.
+** valid values vary with register list length and the specific encoding, see the instruction pages for details.
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|yes
+|yes
+|cm.push _{reg_list}, -stack_adj_
+|<<#insns-cm_push>>
+
+|yes
+|yes
+|cm.pop _{reg_list}, stack_adj_
+|<<#insns-cm_pop>>
+
+|yes
+|yes
+|cm.popret _{reg_list}, stack_adj_
+|<<#insns-cm_popret>>
+
+|yes
+|yes
+|cm.popretz _{reg_list}, stack_adj_
+|<<#insns-cm_popretz>>
+
+|yes
+|yes
+|cm.mva01s _rs1', rs2'_
+|<<#insns-cm_mva01s>>
+
+|yes
+|yes
+|cm.mvsa01 _r1s', r2s'_
+|<<#insns-cm_mvsa01>>
+
+|===
+
+<<<
+
+[#Zcmt]
+=== Zcmt
+
+Zcmt adds the table jump instructions and also adds the JVT CSR. The JVT CSR requires a
+state enable if Smstateen is implemented. See <<csrs-jvt>> for details.
+
+This extension reuses some encodings from _c.fsdsp_. Therefore it is _incompatible_ with <<Zcd>>,
+ which is included when C and D extensions are both present.
+
+NOTE: Zcmt is primarily targeted at embedded class CPUs due to implementation complexity. Additionally, it is not compatible with RVA profiles.
+
+The Zcmt extension depends on the <<Zca>> and Zicsr extensions.
+
+[%header,cols="^1,^1,4,8"]
+|===
+|RV32
+|RV64
+|Mnemonic
+|Instruction
+
+|yes
+|yes
+|cm.jt _index_
+|<<#insns-cm_jt>>
+
+|yes
+|yes
+|cm.jalt _index_
+|<<#insns-cm_jalt>>
+
+|===
+
+[#Zc_formats]
+=== Zc instruction formats
+
+Several instructions in this specification use the following new instruction formats.
+
+[%header,cols="2,3,2,1,1,1,1,1,1,1,1,1,1"]
+|=====================================================================
+| Format | instructions | 15:10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
+| CLB | c.lbu | funct6 3+| rs1' 2+| uimm 3+| rd' 2+| op
+| CSB | c.sb | funct6 3+| rs1' 2+| uimm 3+| rs2' 2+| op
+| CLH | c.lhu, c.lh | funct6 3+| rs1' | funct1 | uimm 3+| rd' 2+| op
+| CSH | c.sh | funct6 3+| rs1' | funct1 | uimm 3+| rs2' 2+| op
+| CU | c.[sz]ext.*, c.not | funct6 3+| rd'/rs1' 5+| funct5 2+| op
+| CMMV | cm.mvsa01 cm.mva01s| funct6 3+| r1s' 2+| funct2 3+| r2s' 2+| op
+| CMJT | cm.jt cm.jalt | funct6 8+| index 2+| op
+| CMPP | cm.push*, cm.pop* | funct6 2+| funct2 4+| urlist 2+| spimm 2+| op
+|=====================================================================
+
+[NOTE]
+====
+c.mul uses the existing CA format.
+====
+
+<<<
+
+[#Zcb_instructions]
+=== Zcb instructions
+
+[#insns-c_lbu,reftext="Load unsigned byte, 16-bit encoding"]
+==== c.lbu
+
+Synopsis:
+
+Load unsigned byte, 16-bit encoding
+
+Mnemonic:
+
+c.lbu _rd'_, _uimm_(_rs1'_)
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x0, attr: ['C0'] },
+ { bits: 3, name: 'rd\'' },
+ { bits: 2, name: 'uimm[0|1]' },
+ { bits: 3, name: 'rs1\'' },
+ { bits: 3, name: 0x0 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+The immediate offset is formed as follows:
+
+[source,sail]
+--
+ uimm[31:2] = 0;
+ uimm[1] = encoding[5];
+ uimm[0] = encoding[6];
+--
+
+Description:
+
+This instruction loads a byte from the memory address formed by adding _rs1'_ to the zero extended immediate _uimm_. The resulting byte is zero extended to XLEN bits and is written to _rd'_.
+
+[NOTE]
+====
+_rd'_ and _rs1'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+//32-bit equivalent:
+//<<insns-lbu>>
+
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+X(rdc) = EXTZ(mem[X(rs1c)+EXTZ(uimm)][7..0]);
+----
+
+<<<
+[#insns-c_lhu,reftext="Load unsigned halfword, 16-bit encoding"]
+==== c.lhu
+
+Synopsis:
+
+Load unsigned halfword, 16-bit encoding
+
+Mnemonic:
+
+c.lhu _rd'_, _uimm_(_rs1'_)
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x0, attr: ['C0'] },
+ { bits: 3, name: 'rd\'' },
+ { bits: 1, name: 'uimm[1]' },
+ { bits: 1, name: 0x0 },
+ { bits: 3, name: 'rs1\'' },
+ { bits: 3, name: 0x1 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+The immediate offset is formed as follows:
+
+[source,sail]
+----
+ uimm[31:2] = 0;
+ uimm[1] = encoding[5];
+ uimm[0] = 0;
+----
+
+Description:
+
+This instruction loads a halfword from the memory address formed by adding _rs1'_ to the zero extended immediate _uimm_. The resulting halfword is zero extended to XLEN bits and is written to _rd'_.
+
+[NOTE]
+====
+_rd'_ and _rs1'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+//32-bit equivalent:
+//
+//<<insns-lhu>>
+
+Operation:
+
+[source,sail]
+--
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+X(rdc) = EXTZ(load_mem[X(rs1c)+EXTZ(uimm)][15..0]);
+--
+
+<<<
+[#insns-c_lh,reftext="Load signed halfword, 16-bit encoding"]
+==== c.lh
+
+Synopsis:
+
+Load signed halfword, 16-bit encoding
+
+Mnemonic:
+
+c.lh _rd'_, _uimm_(_rs1'_)
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x0, attr: ['C0'] },
+ { bits: 3, name: 'rd\'' },
+ { bits: 1, name: 'uimm[1]' },
+ { bits: 1, name: 0x1 },
+ { bits: 3, name: 'rs1\'' },
+ { bits: 3, name: 0x1 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+The immediate offset is formed as follows:
+
+[source,sail]
+----
+ uimm[31:2] = 0;
+ uimm[1] = encoding[5];
+ uimm[0] = 0;
+----
+
+Description:
+
+This instruction loads a halfword from the memory address formed by adding _rs1'_ to the zero extended immediate _uimm_. The resulting halfword is sign extended to XLEN bits and is written to _rd'_.
+
+[NOTE]
+====
+_rd'_ and _rs1'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+//32-bit equivalent:
+//
+//<<insns-lh>>
+
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+X(rdc) = EXTS(load_mem[X(rs1c)+EXTZ(uimm)][15..0]);
+----
+
+<<<
+[#insns-c_sb,reftext="Store byte, 16-bit encoding"]
+==== c.sb
+
+Synopsis:
+
+Store byte, 16-bit encoding
+
+Mnemonic:
+
+c.sb _rs2'_, _uimm_(_rs1'_)
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x0, attr: ['C0'] },
+ { bits: 3, name: 'rs2\'' },
+ { bits: 2, name: 'uimm[0|1]' },
+ { bits: 3, name: 'rs1\'' },
+ { bits: 3, name: 0x2 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+The immediate offset is formed as follows:
+
+[source,sail]
+----
+ uimm[31:2] = 0;
+ uimm[1] = encoding[5];
+ uimm[0] = encoding[6];
+----
+
+Description:
+
+This instruction stores the least significant byte of _rs2'_ to the memory address formed by adding _rs1'_ to the zero extended immediate _uimm_.
+
+[NOTE]
+====
+_rs1'_ and _rs2'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+//
+//32-bit equivalent:
+//
+//<<insns-sb>>
+
+Operation:
+
+[source,sail]
+--
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+mem[X(rs1c)+EXTZ(uimm)][7..0] = X(rs2c)
+--
+
+<<<
+[#insns-c_sh,reftext="Store halfword, 16-bit encoding"]
+==== c.sh
+
+Synopsis:
+
+Store halfword, 16-bit encoding
+
+Mnemonic:
+
+c.sh _rs2'_, _uimm_(_rs1'_)
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x0, attr: ['C0'] },
+ { bits: 3, name: 'rs2\'' },
+ { bits: 1, name: 'uimm[1]' },
+ { bits: 1, name: '0' },
+ { bits: 3, name: 'rs1\'' },
+ { bits: 3, name: 0x3 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+The immediate offset is formed as follows:
+
+[source,sail]
+----
+ uimm[31:2] = 0;
+ uimm[1] = encoding[5];
+ uimm[0] = 0;
+----
+
+Description:
+
+This instruction stores the least significant halfword of _rs2'_ to the memory address formed by adding _rs1'_ to the zero extended immediate _uimm_.
+
+[NOTE]
+====
+_rs1'_ and _rs2'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+//
+//32-bit equivalent:
+//
+//<<insns-sh>>
+
+Operation:
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+mem[X(rs1c)+EXTZ(uimm)][15..0] = X(rs2c)
+----
+
+<<<
+[#insns-c_zext_b,reftext="Zero extend byte, 16-bit encoding"]
+==== c.zext.b
+
+Synopsis:
+
+Zero extend byte, 16-bit encoding
+
+Mnemonic:
+
+c.zext.b _rd'/rs1'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x0, attr: ['C.ZEXT.B'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes a single source/destination operand.
+It zero-extends the least-significant byte of the operand to XLEN bits by inserting zeros into all of
+the bits more significant than 7.
+
+[NOTE]
+====
+_rd'/rs1'_ is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+[source,sail]
+----
+andi rd'/rs1', rd'/rs1', 0xff
+----
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTZ(X(rsdc)[7..0]);
+----
+
+<<<
+[#insns-c_sext_b,reftext="Sign extend byte, 16-bit encoding"]
+==== c.sext.b
+
+Synopsis:
+
+Sign extend byte, 16-bit encoding
+
+Mnemonic:
+
+c.sext.b _rd'/rs1'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x1, attr: ['C.SEXT.B'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes a single source/destination operand.
+It sign-extends the least-significant byte in the operand to XLEN bits by copying the most-significant bit
+in the byte (i.e., bit 7) to all of the more-significant bits.
+
+[NOTE]
+====
+_rd'/rs1'_ is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+Zbb is also required.
+//
+//32-bit equivalent:
+//
+//<<insns-sext_b>> from Zbb
+
+[NOTE]
+
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTS(X(rsdc)[7..0]);
+----
+
+Prerequisites:
+
+Zbb is also required.
+//
+//32-bit equivalent:
+//
+//<<insns-zext_h>> from Zbb
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTZ(X(rsdc)[15..0]);
+----
+
+<<<
+[#insns-c_zext_h,reftext="Zero extend halfword, 16-bit encoding"]
+==== c.zext.h
+
+Synopsis:
+
+Zero extend halfword, 16-bit encoding
+
+Mnemonic:
+
+c.zext.h _rd'/rs1'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x2, attr: ['C.ZEXT.H'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes a single source/destination operand.
+It zero-extends the least-significant halfword of the operand to XLEN bits by inserting zeros into all of
+the bits more significant than 15.
+
+[NOTE]
+====
+_rd'/rs1'_ is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+Zbb is also required.
+//
+//32-bit equivalent:
+//
+//<<insns-zext_h>> from Zbb
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTZ(X(rsdc)[15..0]);
+----
+
+<<<
+[#insns-c_sext_h,reftext="Sign extend halfword, 16-bit encoding"]
+==== c.sext.h
+
+Synopsis:
+
+Sign extend halfword, 16-bit encoding
+
+Mnemonic:
+
+c.sext.h _rd'/rs1'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x3, attr: ['C.SEXT.H'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes a single source/destination operand.
+It sign-extends the least-significant halfword in the operand to XLEN bits by copying the most-significant bit
+in the halfword (i.e., bit 15) to all of the more-significant bits.
+
+[NOTE]
+====
+_rd'/rs1'_ is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+Zbb is also required.
+//
+//32-bit equivalent:
+//
+//<<insns-sext_h>> from Zbb
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTS(X(rsdc)[15..0]);
+----
+
+<<<
+[#insns-c_zext_w,reftext="Zero extend word, 16-bit encoding"]
+==== c.zext.w
+
+Synopsis:
+
+Zero extend word, 16-bit encoding
+
+Mnemonic:
+
+c.zext.w _rd'/rs1'_
+
+Encoding (RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x4, attr: ['C.ZEXT.W'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes a single source/destination operand.
+It zero-extends the least-significant word of the operand to XLEN bits by inserting zeros into all of
+the bits more significant than 31.
+
+[NOTE]
+====
+_rd'/rs1'_ is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+Zba is also required.
+
+32-bit equivalent:
+
+[source,sail]
+----
+add.uw rd'/rs1', rd'/rs1', zero
+----
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = EXTZ(X(rsdc)[31..0]);
+----
+
+<<<
+[#insns-c_not,reftext="Bitwise not, 16-bit encoding"]
+==== c.not
+
+Synopsis:
+
+Bitwise not, 16-bit encoding
+
+Mnemonic:
+
+c.not _rd'/rs1'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 0x5, attr: ['C.NOT'] },
+ { bits: 2, name: 0x3, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction takes the one's complement of _rd'/rs1'_ and writes the result to the same register.
+
+[NOTE]
+====
+rd'/rs1' is from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+[source,sail]
+----
+xori rd'/rs1', rd'/rs1', -1
+----
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_.
+====
+
+Operation:
+
+[source,sail]
+----
+X(rsdc) = X(rsdc) XOR -1;
+----
+
+<<<
+[#insns-c_mul,reftext="Multiply, 16-bit encoding"]
+==== c.mul
+
+Synopsis:
+
+Multiply, 16-bit encoding
+
+Mnemonic:
+
+c.mul _rsd'_, _rs2'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x1, attr: ['C1'] },
+ { bits: 3, name: 'rs2\'', attr: ['SRC2'] },
+ { bits: 2, name: 0x2, attr: ['FUNCT2'] },
+ { bits: 3, name: 'rd\'/rs1\'', attr: ['SRCDST'] },
+ { bits: 3, name: 0x7 },
+ { bits: 3, name: 0x4, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Description:
+
+This instruction multiplies XLEN bits of the source operands from _rsd'_ and _rs2'_ and writes the lowest XLEN bits of the result to _rsd'_.
+
+[NOTE]
+====
+_rd'/rs1'_ and _rs2'_ are from the standard 8-register set x8-x15.
+====
+
+Prerequisites:
+
+M or Zmmul must be configured.
+//
+//32-bit equivalent:
+//
+//<<insns-mul>>
+
+[NOTE]
+====
+The SAIL module variable for _rd'/rs1'_ is called _rsdc_, and for _rs2'_ is called _rs2c_.
+====
+
+Operation:
+
+[source,sail]
+----
+let result_wide = to_bits(2 * sizeof(xlen), signed(X(rsdc)) * signed(X(rs2c)));
+X(rsdc) = result_wide[(sizeof(xlen) - 1) .. 0];
+----
+
+<<<
+
+[#insns-pushpop,reftext="PUSH/POP Register Instructions"]
+=== PUSH/POP register instructions
+
+These instructions are collectively referred to as PUSH/POP:
+
+* <<#insns-cm_push>>
+* <<#insns-cm_pop>>
+* <<#insns-cm_popret>>
+* <<#insns-cm_popretz>>
+
+The term PUSH refers to _cm.push_.
+
+The term POP refers to _cm.pop_.
+
+The term POPRET refers to _cm.popret and cm.popretz_.
+
+Common details for these instructions are in this section.
+
+==== PUSH/POP functional overview
+
+PUSH, POP, POPRET are used to reduce the size of function prologues and epilogues.
+
+. The PUSH instruction
+** adjusts the stack pointer to create the stack frame
+** pushes (stores) the registers specified in the register list to the stack frame
+
+. The POP instruction
+** pops (loads) the registers in the register list from the stack frame
+** adjusts the stack pointer to destroy the stack frame
+
+. The POPRET instructions
+** pop (load) the registers in the register list from the stack frame
+** _cm.popretz_ also moves zero into _a0_ as the return value
+** adjust the stack pointer to destroy the stack frame
+** execute a _ret_ instruction to return from the function
+
+<<<
+==== Example usage
+
+This example gives an illustration of the use of PUSH and POPRET.
+
+The function _processMarkers_ in the EMBench benchmark picojpeg in the following file on github: https://github.com/embench/embench-iot/blob/master/src/picojpeg/libpicojpeg.c[libpicojpeg.c]
+
+The prologue and epilogue compile with GCC10 to:
+
+[source,SAIL]
+----
+
+ 0001098a <processMarkers>:
+ 1098a: 711d addi sp,sp,-96 ;#cm.push(1)
+ 1098c: c8ca sw s2,80(sp) ;#cm.push(2)
+ 1098e: c6ce sw s3,76(sp) ;#cm.push(3)
+ 10990: c4d2 sw s4,72(sp) ;#cm.push(4)
+ 10992: ce86 sw ra,92(sp) ;#cm.push(5)
+ 10994: cca2 sw s0,88(sp) ;#cm.push(6)
+ 10996: caa6 sw s1,84(sp) ;#cm.push(7)
+ 10998: c2d6 sw s5,68(sp) ;#cm.push(8)
+ 1099a: c0da sw s6,64(sp) ;#cm.push(9)
+ 1099c: de5e sw s7,60(sp) ;#cm.push(10)
+ 1099e: dc62 sw s8,56(sp) ;#cm.push(11)
+ 109a0: da66 sw s9,52(sp) ;#cm.push(12)
+ 109a2: d86a sw s10,48(sp);#cm.push(13)
+ 109a4: d66e sw s11,44(sp);#cm.push(14)
+...
+ 109f4: 4501 li a0,0 ;#cm.popretz(1)
+ 109f6: 40f6 lw ra,92(sp) ;#cm.popretz(2)
+ 109f8: 4466 lw s0,88(sp) ;#cm.popretz(3)
+ 109fa: 44d6 lw s1,84(sp) ;#cm.popretz(4)
+ 109fc: 4946 lw s2,80(sp) ;#cm.popretz(5)
+ 109fe: 49b6 lw s3,76(sp) ;#cm.popretz(6)
+ 10a00: 4a26 lw s4,72(sp) ;#cm.popretz(7)
+ 10a02: 4a96 lw s5,68(sp) ;#cm.popretz(8)
+ 10a04: 4b06 lw s6,64(sp) ;#cm.popretz(9)
+ 10a06: 5bf2 lw s7,60(sp) ;#cm.popretz(10)
+ 10a08: 5c62 lw s8,56(sp) ;#cm.popretz(11)
+ 10a0a: 5cd2 lw s9,52(sp) ;#cm.popretz(12)
+ 10a0c: 5d42 lw s10,48(sp);#cm.popretz(13)
+ 10a0e: 5db2 lw s11,44(sp);#cm.popretz(14)
+ 10a10: 6125 addi sp,sp,96 ;#cm.popretz(15)
+ 10a12: 8082 ret ;#cm.popretz(16)
+----
+
+<<<
+
+with the GCC option _-msave-restore_ the output is the following:
+
+[source,SAIL]
+----
+0001080e <processMarkers>:
+ 1080e: 73a012ef jal t0,11f48 <__riscv_save_12>
+ 10812: 1101 addi sp,sp,-32
+...
+ 10862: 4501 li a0,0
+ 10864: 6105 addi sp,sp,32
+ 10866: 71e0106f j 11f84 <__riscv_restore_12>
+----
+
+with PUSH/POPRET this reduces to
+
+[source,SAIL]
+----
+0001080e <processMarkers>:
+ 1080e: b8fa cm.push {ra,s0-s11},-96
+...
+ 10866: bcfa cm.popretz {ra,s0-s11}, 96
+----
+
+The prologue / epilogue reduce from 60-bytes in the original code, to 14-bytes with _-msave-restore_,
+and to 4-bytes with PUSH and POPRET.
+As well as reducing the code-size PUSH and POPRET eliminate the branches from
+calling the millicode _save/restore_ routines and so may also perform better.
+
+[NOTE]
+====
+The calls to _<riscv_save_0>/<riscv_restore_0>_ become 64-bit when the target functions are out of the ±1MB range, increasing the prologue/epilogue size to 22-bytes.
+====
+
+[NOTE]
+====
+POP is typically used in tail-calling sequences where _ret_ is not used to return to _ra_ after destroying the stack frame.
+====
+
+[#pushpop-areg-list]
+
+===== Stack pointer adjustment handling
+
+The instructions all automatically adjust the stack pointer by enough to cover the memory required for the registers being saved or restored.
+Additionally the _spimm_ field in the encoding allows the stack pointer to be adjusted in additional increments of 16-bytes. There is only a small restricted
+range available in the encoding; if the range is insufficient then a separate _c.addi16sp_ can be used to increase the range.
+
+===== Register list handling
+
+There is no support for the _{ra, s0-s10}_ register list without also adding _s11_. Therefore the _{ra, s0-s11}_ register list must be used in this case.
+
+[#pushpop-idempotent-memory]
+==== PUSH/POP Fault handling
+
+Correct execution requires that _sp_ refers to idempotent memory (also see <<pushpop_non-idem-mem>>), because the core must be able to
+handle traps detected during the sequence.
+The entire PUSH/POP sequence is re-executed after returning from the trap handler, and multiple traps are possible during the sequence.
+
+If a trap occurs during the sequence then _xEPC_ is updated with the PC of the instruction, _xTVAL_ (if not read-only-zero) updated with the bad address if it was an access fault and _xCAUSE_ updated with the type of trap.
+
+NOTE: It is implementation defined whether interrupts can also be taken during the sequence execution.
+
+[#pushpop-software-view]
+==== Software view of execution
+
+===== Software view of the PUSH sequence
+
+From a software perspective the PUSH sequence appears as:
+
+* A sequence of stores writing the bytes required by the pseudo-code
+** The bytes may be written in any order.
+** The bytes may be grouped into larger accesses.
+** Any of the bytes may be written multiple times.
+* A stack pointer adjustment
+
+[NOTE]
+====
+If an implementation allows interrupts during the sequence, and the interrupt handler uses _sp_ to allocate stack memory, then any stores which were executed before the interrupt may be overwritten by the handler. This is safe because the memory is idempotent and the stores will be re-executed when execution resumes.
+====
+
+The stack pointer adjustment must only be committed only when it is certain that the entire PUSH instruction will commit.
+
+Stores may also return imprecise faults from the bus.
+It is platform defined whether the core implementation waits for the bus responses before continuing to the final stage of the sequence,
+or handles errors responses after completing the PUSH instruction.
+
+<<<
+
+For example:
+
+[source,sail]
+----
+cm.push {ra, s0-s5}, -64
+----
+
+Appears to software as:
+
+[source,sail]
+----
+# any bytes from sp-1 to sp-28 may be written multiple times before
+# the instruction completes therefore these updates may be visible in
+# the interrupt/exception handler below the stack pointer
+sw s5, -4(sp)
+sw s4, -8(sp)
+sw s3,-12(sp)
+sw s2,-16(sp)
+sw s1,-20(sp)
+sw s0,-24(sp)
+sw ra,-28(sp)
+
+# this must only execute once, and will only execute after all stores
+# completed without any precise faults, therefore this update is only
+# visible in the interrupt/exception handler if cm.push has completed
+addi sp, sp, -64
+----
+
+===== Software view of the POP/POPRET sequence
+
+From a software perspective the POP/POPRET sequence appears as:
+
+* A sequence of loads reading the bytes required by the pseudo-code.
+** The bytes may be loaded in any order.
+** The bytes may be grouped into larger accesses.
+** Any of the bytes may be loaded multiple times.
+* A stack pointer adjustment
+* An optional `li a0, 0`
+* An optional `ret`
+
+If a trap occurs during the sequence, then any loads which were executed before the trap may update architectural state.
+The loads will be re-executed once the trap handler completes, so the values will be overwritten.
+Therefore it is permitted for an implementation to update some of the destination registers before taking a fault.
+
+The optional `li a0, 0`, stack pointer adjustment and optional `ret` must only be committed only when it is certain that the entire POP/POPRET instruction will commit.
+
+For POPRET once the stack pointer adjustment has been committed the `ret` must execute.
+
+<<<
+For example:
+
+[source,sail]
+----
+cm.popretz {ra, s0-s3}, 32;
+----
+
+Appears to software as:
+
+[source,sail]
+----
+# any or all of these load instructions may execute multiple times
+# therefore these updates may be visible in the interrupt/exception handler
+lw s3, 28(sp)
+lw s2, 24(sp)
+lw s1, 20(sp)
+lw s0, 16(sp)
+lw ra, 12(sp)
+
+# these must only execute once, will only execute after all loads
+# complete successfully all instructions must execute atomically
+# therefore these updates are not visible in the interrupt/exception handler
+li a0, 0
+addi sp, sp, 32
+ret
+----
+
+[[pushpop_non-idem-mem,Non-idempotent memory handling]]
+==== Non-idempotent memory handling
+
+An implementation may have a requirement to issue a PUSH/POP instruction to non-idempotent memory.
+
+If the core implementation does not support PUSH/POP to non-idempotent memories, the core may use an idempotency PMA to detect it and take a
+load (POP/POPRET) or store (PUSH) access fault exception in order to avoid unpredictable results.
+
+Software should only use these instructions on non-idempotent memory regions when software can tolerate the required memory accesses
+being issued repeatedly in the case that they cause exceptions.
+
+<<<
+
+==== Example RV32I PUSH/POP sequences
+
+The examples are included show the load/store series expansion and the stack adjustment.
+Examples of _cm.popret_ and _cm.popretz_ are not included, as the difference in the expanded sequence from _cm.pop_ is trivial in all cases.
+
+===== cm.push {ra, s0-s2}, -64
+
+Encoding: _rlist_=7, _spimm_=3
+
+expands to:
+
+[source,sail]
+----
+sw s2, -4(sp);
+sw s1, -8(sp);
+sw s0, -12(sp);
+sw ra, -16(sp);
+addi sp, sp, -64;
+----
+
+===== cm.push {ra, s0-s11}, -112
+
+Encoding: _rlist_=15, _spimm_=3
+
+expands to:
+
+[source,sail]
+----
+sw s11, -4(sp);
+sw s10, -8(sp);
+sw s9, -12(sp);
+sw s8, -16(sp);
+sw s7, -20(sp);
+sw s6, -24(sp);
+sw s5, -28(sp);
+sw s4, -32(sp);
+sw s3, -36(sp);
+sw s2, -40(sp);
+sw s1, -44(sp);
+sw s0, -48(sp);
+sw ra, -52(sp);
+addi sp, sp, -112;
+----
+
+<<<
+
+===== cm.pop {ra}, 16
+
+Encoding: _rlist_=4, _spimm_=0
+
+expands to:
+
+[source,sail]
+----
+lw ra, 12(sp);
+addi sp, sp, 16;
+----
+
+===== cm.pop {ra, s0-s3}, 48
+
+Encoding: _rlist_=8, _spimm_=1
+
+expands to:
+
+[source,sail]
+----
+lw s3, 44(sp);
+lw s2, 40(sp);
+lw s1, 36(sp);
+lw s0, 32(sp);
+lw ra, 28(sp);
+addi sp, sp, 48;
+----
+
+===== cm.pop {ra, s0-s4}, 64
+
+Encoding: _rlist_=9, _spimm_=2
+
+expands to:
+
+[source,sail]
+----
+lw s4, 60(sp);
+lw s3, 56(sp);
+lw s2, 52(sp);
+lw s1, 48(sp);
+lw s0, 44(sp);
+lw ra, 40(sp);
+addi sp, sp, 64;
+----
+
+
+<<<
+[#insns-cm_push,reftext="cm.push"]
+==== cm.push
+
+Synopsis:
+
+Create stack frame: store ra and 0 to 12 saved registers to the stack frame, optionally allocate additional stack space.
+
+Mnemonic:
+
+cm.push _{reg_list}, -stack_adj_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 2, name: 'spimm\[5:4\]', attr: [] },
+ { bits: 4, name: 'rlist', attr: [] },
+ { bits: 5, name: 0x18, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+_rlist_ values 0 to 3 are reserved for a future EABI variant called _cm.push.e_
+====
+
+Assembly Syntax:
+
+[source,sail]
+--
+cm.push {reg_list}, -stack_adj
+cm.push {xreg_list}, -stack_adj
+--
+
+The variables used in the assembly syntax are defined below.
+
+[source,sail]
+----
+RV32E:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32I, RV64:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ case 7: {reg_list="ra, s0-s2"; xreg_list="x1, x8-x9, x18";}
+ case 8: {reg_list="ra, s0-s3"; xreg_list="x1, x8-x9, x18-x19";}
+ case 9: {reg_list="ra, s0-s4"; xreg_list="x1, x8-x9, x18-x20";}
+ case 10: {reg_list="ra, s0-s5"; xreg_list="x1, x8-x9, x18-x21";}
+ case 11: {reg_list="ra, s0-s6"; xreg_list="x1, x8-x9, x18-x22";}
+ case 12: {reg_list="ra, s0-s7"; xreg_list="x1, x8-x9, x18-x23";}
+ case 13: {reg_list="ra, s0-s8"; xreg_list="x1, x8-x9, x18-x24";}
+ case 14: {reg_list="ra, s0-s9"; xreg_list="x1, x8-x9, x18-x25";}
+ //note - to include s10, s11 must also be included
+ case 15: {reg_list="ra, s0-s11"; xreg_list="x1, x8-x9, x18-x27";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32E:
+
+stack_adj_base = 16;
+Valid values:
+stack_adj = [16|32|48|64];
+----
+
+[source,sail]
+----
+RV32I:
+
+switch (rlist) {
+ case 4.. 7: stack_adj_base = 16;
+ case 8..11: stack_adj_base = 32;
+ case 12..14: stack_adj_base = 48;
+ case 15: stack_adj_base = 64;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 7: stack_adj = [16|32|48| 64];
+ case 8..11: stack_adj = [32|48|64| 80];
+ case 12..14: stack_adj = [48|64|80| 96];
+ case 15: stack_adj = [64|80|96|112];
+}
+----
+
+[source,sail]
+----
+RV64:
+
+switch (rlist) {
+ case 4.. 5: stack_adj_base = 16;
+ case 6.. 7: stack_adj_base = 32;
+ case 8.. 9: stack_adj_base = 48;
+ case 10..11: stack_adj_base = 64;
+ case 12..13: stack_adj_base = 80;
+ case 14: stack_adj_base = 96;
+ case 15: stack_adj_base = 112;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 5: stack_adj = [ 16| 32| 48| 64];
+ case 6.. 7: stack_adj = [ 32| 48| 64| 80];
+ case 8.. 9: stack_adj = [ 48| 64| 80| 96];
+ case 10..11: stack_adj = [ 64| 80| 96|112];
+ case 12..13: stack_adj = [ 80| 96|112|128];
+ case 14: stack_adj = [ 96|112|128|144];
+ case 15: stack_adj = [112|128|144|160];
+}
+----
+
+<<<
+Description:
+
+This instruction pushes (stores) the registers in _reg_list_ to the memory below the stack pointer,
+and then creates the stack frame by decrementing the stack pointer by _stack_adj_,
+including any additional stack space requested by the value of _spimm_.
+
+
+[NOTE]
+====
+All ABI register mappings are for the UABI. An EABI version is planned once the EABI is frozen.
+====
+
+For further information see <<insns-pushpop>>.
+
+Stack Adjustment Calculation:
+
+_stack_adj_base_ is the minimum number of bytes, in multiples of 16-byte address increments, required to cover the registers in the list.
+
+_spimm_ is the number of additional 16-byte address increments allocated for the stack frame.
+
+The total stack adjustment represents the total size of the stack frame, which is _stack_adj_base_ added to _spimm_ scaled by 16,
+as defined above.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists
+
+Operation:
+
+The first section of pseudo-code may be executed multiple times before the instruction successfully completes.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+if (XLEN==32) bytes=4; else bytes=8;
+
+addr=sp-bytes;
+for(i in 27,26,25,24,23,22,21,20,19,18,9,8,1) {
+ //if register i is in xreg_list
+ if (xreg_list[i]) {
+ switch(bytes) {
+ 4: asm("sw x[i], 0(addr)");
+ 8: asm("sd x[i], 0(addr)");
+ }
+ addr-=bytes;
+ }
+}
+----
+
+The final section of pseudo-code executes atomically, and only executes if the section above completes without any exceptions or interrupts.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+sp-=stack_adj;
+----
+
+<<<
+[#insns-cm_pop,reftext="cm.pop"]
+==== cm.pop
+
+Synopsis:
+
+Destroy stack frame: load ra and 0 to 12 saved registers from the stack frame, deallocate the stack frame.
+
+Mnemonic:
+
+cm.pop _{reg_list}, stack_adj_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 2, name: 'spimm\[5:4\]', attr: [] },
+ { bits: 4, name: 'rlist', attr: [] },
+ { bits: 5, name: 0x1a, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+_rlist_ values 0 to 3 are reserved for a future EABI variant called _cm.pop.e_
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.pop {reg_list}, stack_adj
+cm.pop {xreg_list}, stack_adj
+----
+
+The variables used in the assembly syntax are defined below.
+
+[source,sail]
+----
+RV32E:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32I, RV64:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ case 7: {reg_list="ra, s0-s2"; xreg_list="x1, x8-x9, x18";}
+ case 8: {reg_list="ra, s0-s3"; xreg_list="x1, x8-x9, x18-x19";}
+ case 9: {reg_list="ra, s0-s4"; xreg_list="x1, x8-x9, x18-x20";}
+ case 10: {reg_list="ra, s0-s5"; xreg_list="x1, x8-x9, x18-x21";}
+ case 11: {reg_list="ra, s0-s6"; xreg_list="x1, x8-x9, x18-x22";}
+ case 12: {reg_list="ra, s0-s7"; xreg_list="x1, x8-x9, x18-x23";}
+ case 13: {reg_list="ra, s0-s8"; xreg_list="x1, x8-x9, x18-x24";}
+ case 14: {reg_list="ra, s0-s9"; xreg_list="x1, x8-x9, x18-x25";}
+ //note - to include s10, s11 must also be included
+ case 15: {reg_list="ra, s0-s11"; xreg_list="x1, x8-x9, x18-x27";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32E:
+
+stack_adj_base = 16;
+Valid values:
+stack_adj = [16|32|48|64];
+----
+
+[source,sail]
+----
+RV32I:
+
+switch (rlist) {
+ case 4.. 7: stack_adj_base = 16;
+ case 8..11: stack_adj_base = 32;
+ case 12..14: stack_adj_base = 48;
+ case 15: stack_adj_base = 64;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 7: stack_adj = [16|32|48| 64];
+ case 8..11: stack_adj = [32|48|64| 80];
+ case 12..14: stack_adj = [48|64|80| 96];
+ case 15: stack_adj = [64|80|96|112];
+}
+----
+
+[source,sail]
+----
+RV64:
+
+switch (rlist) {
+ case 4.. 5: stack_adj_base = 16;
+ case 6.. 7: stack_adj_base = 32;
+ case 8.. 9: stack_adj_base = 48;
+ case 10..11: stack_adj_base = 64;
+ case 12..13: stack_adj_base = 80;
+ case 14: stack_adj_base = 96;
+ case 15: stack_adj_base = 112;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 5: stack_adj = [ 16| 32| 48| 64];
+ case 6.. 7: stack_adj = [ 32| 48| 64| 80];
+ case 8.. 9: stack_adj = [ 48| 64| 80| 96];
+ case 10..11: stack_adj = [ 64| 80| 96|112];
+ case 12..13: stack_adj = [ 80| 96|112|128];
+ case 14: stack_adj = [ 96|112|128|144];
+ case 15: stack_adj = [112|128|144|160];
+}
+----
+
+<<<
+
+Description:
+
+This instruction pops (loads) the registers in _reg_list_ from stack memory,
+and then adjusts the stack pointer by _stack_adj_.
+
+[NOTE]
+====
+All ABI register mappings are for the UABI. An EABI version is planned once the EABI is frozen.
+====
+
+For further information see <<insns-pushpop>>.
+
+Stack Adjustment Calculation:
+
+_stack_adj_base_ is the minimum number of bytes, in multiples of 16-byte address increments, required to cover the registers in the list.
+
+_spimm_ is the number of additional 16-byte address increments allocated for the stack frame.
+
+The total stack adjustment represents the total size of the stack frame, which is _stack_adj_base_ added to _spimm_ scaled by 16,
+as defined above.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists
+
+Operation:
+
+The first section of pseudo-code may be executed multiple times before the instruction successfully completes.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+if (XLEN==32) bytes=4; else bytes=8;
+
+addr=sp+stack_adj-bytes;
+for(i in 27,26,25,24,23,22,21,20,19,18,9,8,1) {
+ //if register i is in xreg_list
+ if (xreg_list[i]) {
+ switch(bytes) {
+ 4: asm("lw x[i], 0(addr)");
+ 8: asm("ld x[i], 0(addr)");
+ }
+ addr-=bytes;
+ }
+}
+----
+
+The final section of pseudo-code executes atomically, and only executes if the section above completes without any exceptions or interrupts.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+sp+=stack_adj;
+----
+
+<<<
+[#insns-cm_popretz,reftext="cm.popretz"]
+==== cm.popretz
+
+Synopsis:
+
+Destroy stack frame: load ra and 0 to 12 saved registers from the stack frame, deallocate the stack frame, move zero into a0, return to ra.
+
+Mnemonic:
+
+cm.popretz _{reg_list}, stack_adj_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 2, name: 'spimm\[5:4\]', attr: [] },
+ { bits: 4, name: 'rlist', attr: [] },
+ { bits: 5, name: 0x1c, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+_rlist_ values 0 to 3 are reserved for a future EABI variant called _cm.popretz.e_
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.popretz {reg_list}, stack_adj
+cm.popretz {xreg_list}, stack_adj
+----
+
+[source,sail]
+----
+RV32E:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32I, RV64:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ case 7: {reg_list="ra, s0-s2"; xreg_list="x1, x8-x9, x18";}
+ case 8: {reg_list="ra, s0-s3"; xreg_list="x1, x8-x9, x18-x19";}
+ case 9: {reg_list="ra, s0-s4"; xreg_list="x1, x8-x9, x18-x20";}
+ case 10: {reg_list="ra, s0-s5"; xreg_list="x1, x8-x9, x18-x21";}
+ case 11: {reg_list="ra, s0-s6"; xreg_list="x1, x8-x9, x18-x22";}
+ case 12: {reg_list="ra, s0-s7"; xreg_list="x1, x8-x9, x18-x23";}
+ case 13: {reg_list="ra, s0-s8"; xreg_list="x1, x8-x9, x18-x24";}
+ case 14: {reg_list="ra, s0-s9"; xreg_list="x1, x8-x9, x18-x25";}
+ //note - to include s10, s11 must also be included
+ case 15: {reg_list="ra, s0-s11"; xreg_list="x1, x8-x9, x18-x27";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32E:
+
+stack_adj_base = 16;
+Valid values:
+stack_adj = [16|32|48|64];
+----
+
+[source,sail]
+----
+RV32I:
+
+switch (rlist) {
+ case 4.. 7: stack_adj_base = 16;
+ case 8..11: stack_adj_base = 32;
+ case 12..14: stack_adj_base = 48;
+ case 15: stack_adj_base = 64;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 7: stack_adj = [16|32|48| 64];
+ case 8..11: stack_adj = [32|48|64| 80];
+ case 12..14: stack_adj = [48|64|80| 96];
+ case 15: stack_adj = [64|80|96|112];
+}
+----
+
+[source,sail]
+----
+RV64:
+
+switch (rlist) {
+ case 4.. 5: stack_adj_base = 16;
+ case 6.. 7: stack_adj_base = 32;
+ case 8.. 9: stack_adj_base = 48;
+ case 10..11: stack_adj_base = 64;
+ case 12..13: stack_adj_base = 80;
+ case 14: stack_adj_base = 96;
+ case 15: stack_adj_base = 112;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 5: stack_adj = [ 16| 32| 48| 64];
+ case 6.. 7: stack_adj = [ 32| 48| 64| 80];
+ case 8.. 9: stack_adj = [ 48| 64| 80| 96];
+ case 10..11: stack_adj = [ 64| 80| 96|112];
+ case 12..13: stack_adj = [ 80| 96|112|128];
+ case 14: stack_adj = [ 96|112|128|144];
+ case 15: stack_adj = [112|128|144|160];
+}
+----
+
+<<<
+
+Description:
+
+This instruction pops (loads) the registers in _reg_list_ from stack memory, adjusts the stack pointer by _stack_adj_, moves zero into a0 and then returns to _ra_.
+
+[NOTE]
+====
+All ABI register mappings are for the UABI. An EABI version is planned once the EABI is frozen.
+====
+
+For further information see <<insns-pushpop>>.
+
+Stack Adjustment Calculation:
+
+_stack_adj_base_ is the minimum number of bytes, in multiples of 16-byte address increments, required to cover the registers in the list.
+
+_spimm_ is the number of additional 16-byte address increments allocated for the stack frame.
+
+The total stack adjustment represents the total size of the stack frame, which is _stack_adj_base_ added to _spimm_ scaled by 16, as defined above.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists
+
+
+Operation:
+
+The first section of pseudo-code may be executed multiple times before the instruction successfully completes.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+if (XLEN==32) bytes=4; else bytes=8;
+
+addr=sp+stack_adj-bytes;
+for(i in 27,26,25,24,23,22,21,20,19,18,9,8,1) {
+ //if register i is in xreg_list
+ if (xreg_list[i]) {
+ switch(bytes) {
+ 4: asm("lw x[i], 0(addr)");
+ 8: asm("ld x[i], 0(addr)");
+ }
+ addr-=bytes;
+ }
+}
+----
+
+The final section of pseudo-code executes atomically, and only executes if the section above completes without any exceptions or interrupts.
+
+[NOTE]
+====
+The _li a0, 0_ *could* be executed more than once, but is included in the atomic section for convenience.
+====
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+asm("li a0, 0");
+sp+=stack_adj;
+asm("ret");
+----
+
+<<<
+[#insns-cm_popret,reftext="cm.popret"]
+==== cm.popret
+
+Synopsis:
+
+Destroy stack frame: load ra and 0 to 12 saved registers from the stack frame, deallocate the stack frame, return to ra.
+
+Mnemonic:
+
+cm.popret _{reg_list}, stack_adj_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 2, name: 'spimm\[5:4\]', attr: [] },
+ { bits: 4, name: 'rlist', attr: [] },
+ { bits: 5, name: 0x1e, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+_rlist_ values 0 to 3 are reserved for a future EABI variant called _cm.popret.e_
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.popret {reg_list}, stack_adj
+cm.popret {xreg_list}, stack_adj
+----
+
+The variables used in the assembly syntax are defined below.
+
+[source,sail]
+----
+RV32E:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32I, RV64:
+
+switch (rlist){
+ case 4: {reg_list="ra"; xreg_list="x1";}
+ case 5: {reg_list="ra, s0"; xreg_list="x1, x8";}
+ case 6: {reg_list="ra, s0-s1"; xreg_list="x1, x8-x9";}
+ case 7: {reg_list="ra, s0-s2"; xreg_list="x1, x8-x9, x18";}
+ case 8: {reg_list="ra, s0-s3"; xreg_list="x1, x8-x9, x18-x19";}
+ case 9: {reg_list="ra, s0-s4"; xreg_list="x1, x8-x9, x18-x20";}
+ case 10: {reg_list="ra, s0-s5"; xreg_list="x1, x8-x9, x18-x21";}
+ case 11: {reg_list="ra, s0-s6"; xreg_list="x1, x8-x9, x18-x22";}
+ case 12: {reg_list="ra, s0-s7"; xreg_list="x1, x8-x9, x18-x23";}
+ case 13: {reg_list="ra, s0-s8"; xreg_list="x1, x8-x9, x18-x24";}
+ case 14: {reg_list="ra, s0-s9"; xreg_list="x1, x8-x9, x18-x25";}
+ //note - to include s10, s11 must also be included
+ case 15: {reg_list="ra, s0-s11"; xreg_list="x1, x8-x9, x18-x27";}
+ default: reserved();
+}
+stack_adj = stack_adj_base + spimm[5:4] * 16;
+----
+
+[source,sail]
+----
+RV32E:
+
+stack_adj_base = 16;
+Valid values:
+stack_adj = [16|32|48|64];
+----
+
+[source,sail]
+----
+RV32I:
+
+switch (rlist) {
+ case 4.. 7: stack_adj_base = 16;
+ case 8..11: stack_adj_base = 32;
+ case 12..14: stack_adj_base = 48;
+ case 15: stack_adj_base = 64;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 7: stack_adj = [16|32|48| 64];
+ case 8..11: stack_adj = [32|48|64| 80];
+ case 12..14: stack_adj = [48|64|80| 96];
+ case 15: stack_adj = [64|80|96|112];
+}
+----
+
+[source,sail]
+----
+RV64:
+
+switch (rlist) {
+ case 4.. 5: stack_adj_base = 16;
+ case 6.. 7: stack_adj_base = 32;
+ case 8.. 9: stack_adj_base = 48;
+ case 10..11: stack_adj_base = 64;
+ case 12..13: stack_adj_base = 80;
+ case 14: stack_adj_base = 96;
+ case 15: stack_adj_base = 112;
+}
+
+Valid values:
+switch (rlist) {
+ case 4.. 5: stack_adj = [ 16| 32| 48| 64];
+ case 6.. 7: stack_adj = [ 32| 48| 64| 80];
+ case 8.. 9: stack_adj = [ 48| 64| 80| 96];
+ case 10..11: stack_adj = [ 64| 80| 96|112];
+ case 12..13: stack_adj = [ 80| 96|112|128];
+ case 14: stack_adj = [ 96|112|128|144];
+ case 15: stack_adj = [112|128|144|160];
+}
+----
+
+<<<
+
+Description:
+
+This instruction pops (loads) the registers in _reg_list_ from stack memory, adjusts the stack pointer by _stack_adj_ and then returns to _ra_.
+
+[NOTE]
+====
+All ABI register mappings are for the UABI. An EABI version is planned once the EABI is frozen.
+====
+
+For further information see <<insns-pushpop>>.
+
+Stack Adjustment Calculation:
+
+_stack_adj_base_ is the minimum number of bytes, in multiples of 16-byte address increments, required to cover the registers in the list.
+
+_spimm_ is the number of additional 16-byte address increments allocated for the stack frame.
+
+The total stack adjustment represents the total size of the stack frame, which is _stack_adj_base_ added to _spimm_ scaled by 16, as defined above.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists
+
+Operation:
+
+The first section of pseudo-code may be executed multiple times before the instruction successfully completes.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+if (XLEN==32) bytes=4; else bytes=8;
+
+addr=sp+stack_adj-bytes;
+for(i in 27,26,25,24,23,22,21,20,19,18,9,8,1) {
+ //if register i is in xreg_list
+ if (xreg_list[i]) {
+ switch(bytes) {
+ 4: asm("lw x[i], 0(addr)");
+ 8: asm("ld x[i], 0(addr)");
+ }
+ addr-=bytes;
+ }
+}
+----
+
+The final section of pseudo-code executes atomically, and only executes if the section above completes without any exceptions or interrupts.
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+sp+=stack_adj;
+asm("ret");
+----
+
+<<<
+
+[#insns-cm_mvsa01,reftext="Move a0-a1 into two different s0-s7 registers"]
+==== cm.mvsa01
+
+Synopsis:
+
+Move a0-a1 into two registers of s0-s7
+
+Mnemonic:
+
+cm.mvsa01 _r1s'_, _r2s'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 3, name: 'r2s\'', attr: [] },
+ { bits: 2, name: 0x1, attr: [] },
+ { bits: 3, name: 'r1s\'', attr: [] },
+ { bits: 3, name: 0x3, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+For the encoding to be legal _r1s'_ != _r2s'_.
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.mvsa01 r1s', r2s'
+----
+
+Description:
+This instruction moves _a0_ into _r1s'_ and _a1_ into _r2s'_. _r1s'_ and _r2s'_ must be different.
+The execution is atomic, so it is not possible to observe state where only one of _r1s'_ or _r2s'_ has been updated.
+
+The encoding uses _sreg_ number specifiers instead of _xreg_ number specifiers to save encoding space.
+The mapping between them is specified in the pseudo-code below.
+
+[NOTE]
+====
+The _s_ register mapping is taken from the UABI, and may not match the currently unratified EABI. _cm.mvsa01.e_ may be included in the future.
+====
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists.
+
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+if (RV32E && (r1sc>1 || r2sc>1)) {
+ reserved();
+}
+xreg1 = {r1sc[2:1]>0,r1sc[2:1]==0,r1sc[2:0]};
+xreg2 = {r2sc[2:1]>0,r2sc[2:1]==0,r2sc[2:0]};
+X[xreg1] = X[10];
+X[xreg2] = X[11];
+----
+
+<<<
+
+[#insns-cm_mva01s,reftext="Move two s0-s7 registers into a0-a1"]
+==== cm.mva01s
+
+Synopsis:
+
+Move two s0-s7 registers into a0-a1
+
+Mnemonic:
+
+cm.mva01s _r1s'_, _r2s'_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 3, name: 'r2s\'', attr: [] },
+ { bits: 2, name: 0x3, attr: [] },
+ { bits: 3, name: 'r1s\'', attr: [] },
+ { bits: 3, name: 0x3, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.mva01s r1s', r2s'
+----
+
+Description:
+This instruction moves _r1s'_ into _a0_ and _r2s'_ into _a1_.
+The execution is atomic, so it is not possible to observe state where only one of _a0_ or _a1_ have been updated.
+
+The encoding uses _sreg_ number specifiers instead of _xreg_ number specifiers to save encoding space.
+The mapping between them is specified in the pseudo-code below.
+
+[NOTE]
+====
+The _s_ register mapping is taken from the UABI, and may not match the currently unratified EABI. _cm.mva01s.e_ may be included in the future.
+====
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists.
+
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+if (RV32E && (r1sc>1 || r2sc>1)) {
+ reserved();
+}
+xreg1 = {r1sc[2:1]>0,r1sc[2:1]==0,r1sc[2:0]};
+xreg2 = {r2sc[2:1]>0,r2sc[2:1]==0,r2sc[2:0]};
+X[10] = X[xreg1];
+X[11] = X[xreg2];
+----
+
+<<<
+
+[#insns-tablejump,reftext="Table Jump Overview"]
+=== Table Jump Overview
+
+_cm.jt_ (<<#insns-cm_jt>>) and _cm.jalt_ (<<#insns-cm_jalt>>) are referred to as table jump.
+
+Table jump uses a 256-entry XLEN wide table in instruction memory to contain function addresses.
+The table must be a minimum of 64-byte aligned.
+
+Table entries follow the current data endianness. This is different from normal instruction fetch which is always little-endian.
+
+_cm.jt_ and _cm.jalt_ encodings index the table, giving access to functions within the full XLEN wide address space.
+
+This is used as a form of dictionary compression to reduce the code size of _jal_ / _auipc+jalr_ / _jr_ / _auipc+jr_ instructions.
+
+Table jump allows the linker to replace the following instruction sequences with a _cm.jt_ or _cm.jalt_ encoding, and an entry in the table:
+
+* 32-bit _j_ calls
+* 32-bit _jal_ ra calls
+* 64-bit _auipc+jr_ calls to fixed locations
+* 64-bit _auipc+jalr ra_ calls to fixed locations
+** The _auipc+jr/jalr_ sequence is used because the offset from the PC is out of the ±1MB range.
+
+If a return address stack is implemented, then as _cm.jalt_ is equivalent to _jal ra_, it pushes to the stack.
+
+==== JVT
+
+The base of the table is in the JVT CSR (see <<csrs-jvt>>), each table entry is XLEN bits.
+
+If the same function is called with and without linking then it must have two entries in the table.
+This is typically caused by the same function being called with and without tail calling.
+
+[#tablejump-fault-handling]
+==== Table Jump Fault handling
+
+For a table jump instruction, the table entry that the instruction selects is considered an extension of the instruction itself.
+Hence, the execution of a table jump instruction involves two instruction fetches, the first to read the instruction (_cm.jt_/_cm.jalt_)
+and the second to read from the jump vector table (JVT). Both instruction fetches are _implicit_ reads, and both require
+execute permission; read permission is irrelevant. It is recommended that the second fetch be ignored for hardware triggers and breakpoints.
+
+Memory writes to the jump vector table require an instruction barrier (_fence.i_) to guarantee that they are visible to the instruction fetch.
+
+Multiple contexts may have different jump vector tables. JVT may be switched between them without an instruction barrier
+if the tables have not been updated in memory since the last _fence.i_.
+
+If an exception occurs on either instruction fetch, xEPC is set to the PC of the table jump instruction, xCAUSE is set as expected for the type of fault and xTVAL (if not set to zero) contains the fetch address which caused the fault.
+
+<<<
+[#csrs-jvt,reftext="JVT CSR, table jump base vector and control register"]
+==== JVT CSR
+
+Synopsis:
+
+Table jump base vector and control register
+
+Address:
+
+0x0017
+
+Permissions:
+
+URW
+
+Format (RV32):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 6, name: 'mode', attr: ['6'] },
+ { bits: 26, name: 'base[XLEN-1:6] (WARL)', attr: ['XLEN-6'] },
+],config:{bits:32}}
+....
+
+Format (RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 6, name: 'mode', attr: ['6'] },
+ { bits: 58, name: 'base[XLEN-1:6] (WARL)', attr: ['XLEN-6'] },
+],config:{bits:64}}
+....
+
+Description:
+
+The _JVT_ register is an XLEN-bit *WARL* read/write register that holds the jump table configuration, consisting of the jump table base address (BASE) and the jump table mode (MODE).
+
+If <<Zcmt>> is implemented then _JVT_ must also be implemented, but can contain a read-only value. If _JVT_ is writable, the set of values the register may hold can vary by implementation. The value in the BASE field must always be aligned on a 64-byte boundary.
+
+_JVT.base_ is a virtual address, whenever virtual memory is enabled.
+
+The memory pointed to by _JVT.base_ is treated as instruction memory for the purpose of executing table jump instructions, implying execute access permission.
+
+[#JVT-config-table]
+._JVT.mode_ definition
+[width="60%",options=header]
+|=============================================================================================
+| JVT.mode | Comment
+| 000000 | Jump table mode
+| others | *reserved for future standard use*
+|=============================================================================================
+
+_JVT.mode_ is a *WARL* field, so can only be programmed to modes which are implemented. Therefore the discovery mechanism is to
+attempt to program different modes and read back the values to see which are available. Jump table mode _must_ be implemented.
+
+[NOTE]
+====
+in future the RISC-V Unified Discovery method will report the available modes.
+====
+
+Architectural State:
+
+_JVT_ adds architectural state to the system software context (such as an OS process), therefore must be saved/restored on context switches.
+
+State Enable:
+
+If the Smstateen extension is implemented, then bit 2 in _mstateen0_, _sstateen0_, and _hstateen0_ is implemented. If bit 2 of a controlling _stateen0_ CSR is zero, then access to the _JVT_ CSR and execution of a _cm.jalt_ or _cm.jt_ instruction by a lower privilege level results in an Illegal Instruction trap (or, if appropriate, a Virtual Instruction trap).
+
+<<<
+[#insns-cm_jt,reftext="Jump via table"]
+==== cm.jt
+
+Synopsis:
+
+jump via table
+
+Mnemonic:
+
+cm.jt _index_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 8, name: 'index', attr: [] },
+ { bits: 3, name: 0x0, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+For this encoding to decode as _cm.jt_, _index<32_, otherwise it decodes as _cm.jalt_, see <<insns-cm_jalt>>.
+====
+
+[NOTE]
+====
+If JVT.mode = 0 (Jump Table Mode) then _cm.jt_ behaves as specified here. If JVT.mode is a reserved value, then _cm.jt_ is also reserved. In the future other defined values of JVT.mode may change the behaviour of _cm.jt_.
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.jt index
+----
+
+Description:
+
+_cm.jt_ reads an entry from the jump vector table in memory and jumps to the address that was read.
+
+For further information see <<insns-tablejump>>.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists.
+
+<<<
+
+[#insns-cm_jt-SAIL,reftext="cm.jt SAIL code"]
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+# target_address is temporary internal state, it doesn't represent a real register
+# InstMemory is byte indexed
+
+switch(XLEN) {
+ 32: table_address[XLEN-1:0] = JVT.base + (index<<2);
+ 64: table_address[XLEN-1:0] = JVT.base + (index<<3);
+}
+
+//fetch from the jump table
+target_address[XLEN-1:0] = InstMemory[table_address][XLEN-1:0];
+
+j target_address[XLEN-1:0]&~0x1;
+
+----
+
+<<<
+[#insns-cm_jalt,reftext="Jump and link via table"]
+==== cm.jalt
+
+Synopsis:
+
+jump via table with optional link
+
+Mnemonic:
+
+cm.jalt _index_
+
+Encoding (RV32, RV64):
+
+[wavedrom, , svg]
+....
+{reg:[
+ { bits: 2, name: 0x2, attr: ['C2'] },
+ { bits: 8, name: 'index', attr: [] },
+ { bits: 3, name: 0x0, attr: [] },
+ { bits: 3, name: 0x5, attr: ['FUNCT3'] },
+],config:{bits:16}}
+....
+
+[NOTE]
+====
+For this encoding to decode as _cm.jalt_, _index>=32_, otherwise it decodes as _cm.jt_, see <<insns-cm_jt>>.
+====
+
+[NOTE]
+====
+If JVT.mode = 0 (Jump Table Mode) then _cm.jalt_ behaves as specified here. If JVT.mode is a reserved value, then _cm.jalt_ is also reserved. In the future other defined values of JVT.mode may change the behaviour of _cm.jalt_.
+====
+
+Assembly Syntax:
+
+[source,sail]
+----
+cm.jalt index
+----
+
+Description:
+
+_cm.jalt_ reads an entry from the jump vector table in memory and jumps to the address that was read, linking to _ra_.
+
+For further information see <<insns-tablejump>>.
+
+Prerequisites:
+
+None
+
+32-bit equivalent:
+
+No direct equivalent encoding exists.
+
+<<<
+
+[#insns-cm_jalt-SAIL,reftext="cm.jalt SAIL code"]
+Operation:
+
+[source,sail]
+----
+//This is not SAIL, it's pseudo-code. The SAIL hasn't been written yet.
+
+# target_address is temporary internal state, it doesn't represent a real register
+# InstMemory is byte indexed
+
+switch(XLEN) {
+ 32: table_address[XLEN-1:0] = JVT.base + (index<<2);
+ 64: table_address[XLEN-1:0] = JVT.base + (index<<3);
+}
+
+//fetch from the jump table
+target_address[XLEN-1:0] = InstMemory[table_address][XLEN-1:0];
+
+jal ra, target_address[XLEN-1:0]&~0x1;
+
+----
+
+
+
diff --git a/src/zfa.adoc b/src/zfa.adoc
index c252c6d..6c58dbd 100644
--- a/src/zfa.adoc
+++ b/src/zfa.adoc
@@ -1,11 +1,5 @@
[[zfa]]
-== "Zfa" Standard Extension for Additional Floating-Point Instructions, Version 0.1
-
-[WARNING]
-====
-This draft specification may change before being accepted as
-standard by RISC-V International.
-====
+== "Zfa" Standard Extension for Additional Floating-Point Instructions, Version 1.0
This chapter describes the Zfa standard extension, which adds
instructions for immediate loads, IEEE 754-2019 minimum and maximum
@@ -242,4 +236,4 @@ with instruction bit 14 set to 1.
We do not expect analogous comparison instructions will be added to the
vector ISA, since they can be reasonably efficiently emulated using
masking.
-==== \ No newline at end of file
+====
diff --git a/src/zfh.adoc b/src/zfh.adoc
index 72f9358..9e8710e 100644
--- a/src/zfh.adoc
+++ b/src/zfh.adoc
@@ -41,7 +41,7 @@ written to _rd_, whereas FSH ignores all but the lower 16 bits in _rs2_.
=== Half-Precision Computational Instructions
A new supported format is added to the format field of most
-instructions, as shown in Table <<tab:fpextfmth>>.
+instructions, as shown in <<tab:fpextfmth>>.
[[tab:fpextfmth]]
.Format field encoding.
@@ -91,7 +91,7 @@ floating-point number to a quad-precision floating-point number, or
vice-versa, respectively.
include::images/wavedrom/half-prec-flpt-to-flpt-conv.adoc[]
-[half-prec-flpt-to-flpt-conv]
+[[half-prec-flpt-to-flpt-conv]]
Floating-point to floating-point sign-injection instructions, FSGNJ.H,
FSGNJN.H, and FSGNJX.H are defined analogously to the single-precision
@@ -172,4 +172,4 @@ converting to double-precision. If the D extension is not present and a
1-ulp error under RNE or RMM is tolerable, 32-bit integers can be first
converted to single-precision instead. The same remark applies to
conversions from 64-bit integers without the Q extension.
-==== \ No newline at end of file
+====
diff --git a/src/zfinx.adoc b/src/zfinx.adoc
index f3ceabe..718f124 100644
--- a/src/zfinx.adoc
+++ b/src/zfinx.adoc
@@ -144,4 +144,4 @@ is implemented.
====
A future discoverability mechanism might be used to probe the existence
of the Zfinx, Zhinx, and Zdinx extensions.
-==== \ No newline at end of file
+====
diff --git a/src/zicsr.adoc b/src/zicsr.adoc
index 7edd333..50183a8 100644
--- a/src/zicsr.adoc
+++ b/src/zicsr.adoc
@@ -38,27 +38,27 @@ of the CSR, zero-extends the value to XLEN bits, and writes it to
integer register _rd_. The initial value in integer register _rs1_ is
treated as a bit mask that specifies bit positions to be set in the CSR.
Any bit that is high in _rs1_ will cause the corresponding bit to be set
-in the CSR, if that CSR bit is writable. Other bits in the CSR are not
-explicitly written.
+in the CSR, if that CSR bit is writable.
The CSRRC (Atomic Read and Clear Bits in CSR) instruction reads the
value of the CSR, zero-extends the value to XLEN bits, and writes it to
integer register _rd_. The initial value in integer register _rs1_ is
treated as a bit mask that specifies bit positions to be cleared in the
CSR. Any bit that is high in _rs1_ will cause the corresponding bit to
-be cleared in the CSR, if that CSR bit is writable. Other bits in the
-CSR are not explicitly written.
+be cleared in the CSR, if that CSR bit is writable.
For both CSRRS and CSRRC, if _rs1_=`x0`, then the instruction will not
write to the CSR at all, and so shall not cause any of the side effects
-that might otherwise occur on a CSR write, nor raise illegal instruction
+that might otherwise occur on a CSR write, nor raise illegal-instruction
exceptions on accesses to read-only CSRs. Both CSRRS and CSRRC always
read the addressed CSR and cause any read side effects regardless of
-_rs1_ and _rd_ fields. Note that if _rs1_ specifies a register holding a
-zero value other than `x0`, the instruction will still attempt to write
-the unmodified value back to the CSR and will cause any attendant side
-effects. A CSRRW with _rs1_=`x0` will attempt to write zero to the
-destination CSR.
+_rs1_ and _rd_ fields.
+Note that if _rs1_ specifies a register other than `x0`, and that register
+holds a zero value, the instruction will not action any attendant per-field
+side effects, but will action any side effects caused by writing to the entire
+CSR.
+
+A CSRRW with _rs1_=`x0` will attempt to write zero to the destination CSR.
The CSRRWI, CSRRSI, and CSRRCI variants are similar to CSRRW, CSRRS, and
CSRRC respectively, except they update the CSR using an XLEN-bit value
@@ -67,7 +67,7 @@ encoded in the _rs1_ field instead of a value from an integer register.
For CSRRSI and CSRRCI, if the uimm[4:0] field is zero, then these
instructions will not write to the CSR, and shall not cause any of the
side effects that might otherwise occur on a CSR write, nor raise
-illegal instruction exceptions on accesses to read-only CSRs. For
+illegal-instruction exceptions on accesses to read-only CSRs. For
CSRRWI, if _rd_=`x0`, then the instruction shall not read the CSR and
shall not cause any of the side effects that might occur on a CSR read.
Both CSRRSI and CSRRCI will always read the CSR and cause any read side
@@ -105,6 +105,21 @@ CSR
<<csrsideeffects>> summarizes the behavior of the CSR
instructions with respect to whether they read and/or write the CSR.
+In addition to side effects that occur as a consequence of reading or
+writing a CSR, individual fields within a CSR might have side effects
+when written. The CSRRW[I] instructions action side effects for all
+such fields within the written CSR. The CSRRS[I] an CSRRC[I] instructions
+only action side effects for fields for which the _rs1_ or _uimm_ argument
+has at least one bit set corresponding to that field.
+[NOTE]
+====
+As of this writing, no standard CSRs have side effects on field writes.
+Hence, whether a standard CSR access has any side effects can be determined
+solely from the opcode.
+
+Defining CSRs with side effects on field writes is not recommended.
+====
+
For any event or consequence that occurs due to a CSR having a
particular value, if a write to the CSR gives it that value, the
resulting event or consequence is said to be an _indirect effect_ of the
@@ -218,4 +233,4 @@ ordered CSRs and accesses to memory-mapped I/O regions.
====
The rules for the reordering of CSR accesses in the global memory order
should probably be moved to <<memorymodel>> concerning the RVWMO memory consistency model.
-==== \ No newline at end of file
+====
diff --git a/src/zimop.adoc b/src/zimop.adoc
new file mode 100644
index 0000000..620aae3
--- /dev/null
+++ b/src/zimop.adoc
@@ -0,0 +1,122 @@
+[[zimop]]
+== "Zimop" May-Be-Operations Extension, Version 0.1
+
+[WARNING]
+====
+*Warning! This stable specification may change before being accepted as
+a standard by RISC-V International.*
+====
+
+This chapter defines the "Zimop" extension, which introduces the concept of
+instructions that _may be operations_ (MOPs). MOPs are initially defined to
+simply write zero to `x[rd]`, but are designed to be redefined by later
+extensions to perform some other action.
+The Zimop extension defines an encoding space for 40 MOPs.
+
+[NOTE]
+====
+It is sometimes desirable to define instruction-set extensions whose
+instructions, rather than raising illegal-instruction exceptions when the extension is
+not implemented, take no useful action (beyond writing `x[rd]`).
+For example, programs with control-flow integrity checks can
+execute correctly on implementations without the corresponding extension,
+provided the checks are simply ignored. Implementing these checks as MOPs
+allows the same programs to run on implementations with or without the
+corresponding extension.
+
+Although similar in some respects to HINTs, MOPs cannot be encoded as HINTs,
+because unlike HINTs, MOPs are allowed to alter architectural state.
+
+Because MOPs may be redefined by later extensions, standard software should
+not execute a MOP unless it is deliberately targeting an extension that has
+redefined that MOP.
+====
+
+The Zimop extension defines 32 MOP instructions named MOP.R.__n__, where
+__n__ is an integer between 0 and 31, inclusive.
+Unless redefined by another extension, these instructions simply write 0 to
+`x[rd]`. Their encoding allows future extensions to define them to read `x[rs1]`,
+as well as write `x[rd]`.
+
+include::images/wavedrom/mop-r.adoc[]
+[[mop-r]]
+
+The Zimop extension additionally defines 8 MOP instructions named
+MOP.RR.__n__, where __n__ is an integer between 0 and 7, inclusive.
+Unless redefined by another extension, these instructions simply
+write 0 to `x[rd]`. Their encoding allows future extensions to define them to
+read `x[rs1]` and `x[rs2]`, as well as write `x[rd]`.
+
+include::images/wavedrom/mop-rr.adoc[]
+[[mop-rr]]
+
+NOTE: The recommended assembly syntax for MOP.R.__n__ is MOP.R.__n__ rd, rs1,
+with any `x`-register specifier being valid for either argument. Similarly for
+MOP.RR.__n__, the recommended syntax is MOP.RR.__n__ rd, rs1, rs2.
+The extension that redefines a MOP may define an alternate assembly mnemonic.
+
+NOTE: These MOPs are encoded in the SYSTEM major opcode in part because it is
+expected their behavior will be modulated by privileged CSR state.
+
+NOTE: These MOPs are defined to write zero to `x[rd]`, rather than performing
+no operation, to simplify instruction decoding and to allow testing the
+presence of features by branching on the zeroness of the result.
+
+The MOPs defined in the Zimop extension do not carry a syntactic dependency
+from `x[rs1]` or `x[rs2]` to `x[rd]`, though an extension that redefines the
+MOP may impose such a requirement.
+
+NOTE: Not carrying a syntactic dependency relieves straightforward
+implementations of reading `x[rs1]` and `x[rs2]`.
+
+=== "Zcmop" Compressed May-Be-Operations Extension, Version 0.2
+
+[WARNING]
+====
+*Warning! This stable specification may change before being accepted as
+a standard by RISC-V International.*
+====
+
+This section defines the "Zcmop" extension, which defines eight 16-bit MOP
+instructions named C.MOP.__n__, where __n__ is an odd integer between 1 and
+15, inclusive. C.MOP.__n__ is encoded in the reserved encoding space
+corresponding to C.LUI x__n__, 0, as shown in <<tab:c-mop>>.
+Unlike the MOPs defined in the Zimop extension, the C.MOP.__n__ instructions
+are defined to _not_ write any register.
+Their encoding allows future extensions to define them to read register
+`x[__n__]`.
+
+The Zcmop extension requires the Zca extension.
+
+include::images/wavedrom/c-mop.adoc[]
+[[c-mop]]
+
+NOTE: Very few suitable 16-bit encoding spaces exist. This space was chosen
+because it already has unusual behavior with respect to the `rd`/`rs1`
+field--it encodes `c.addi16sp` when the field contains `x2`--and is
+therefore of lower value for most purposes.
+
+[[tab:c-mop]]
+.C.MOP.__n__ instruction encoding.
+
+|===
+|Mnemonic | Encoding | Redefinable to read register
+
+|C.MOP.1 | `0110000010000001` | `x1`
+|C.MOP.3 | `0110000110000001` | `x3`
+|C.MOP.5 | `0110001010000001` | `x5`
+|C.MOP.7 | `0110001110000001` | `x7`
+|C.MOP.9 | `0110010010000001` | `x9`
+|C.MOP.11 | `0110010110000001` | `x11`
+|C.MOP.13 | `0110011010000001` | `x13`
+|C.MOP.15 | `0110011110000001` | `x15`
+|===
+
+NOTE: The recommended assembly syntax for C.MOP.__n__ is simply the nullary
+C.MOP.__n__. The possibly accessed register is implicitly `x__n__`.
+
+NOTE: The expectation is that each Zcmop instruction is equivalent to some
+Zimop instruction, but the choice of expansion (if any) is left to the
+extension that redefines the MOP.
+Note, a Zcmop instruction that does not write a value can expand into a write
+to `x0`.