From ab6f8c9bd7bc85361fcf35667d1fddfaf367a53f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 Feb 2017 20:41:47 -0800 Subject: Reorganize directory structure --- src/a.tex | 379 ++ src/assembly.tex | 118 + src/b.tex | 19 + src/bbding.sty | 158 + src/c.tex | 1162 +++++ src/calling.tex | 198 + src/cfgstr.tex | 55 + src/d.tex | 353 ++ src/extensions.tex | 381 ++ src/f.tex | 742 +++ src/figs/PLIC-block-diagram.pdf | Bin 0 -> 19588 bytes src/figs/PLIC-interrupt-flow.pdf | Bin 0 -> 18106 bytes src/figs/halimps.pdf | Bin 0 -> 14439 bytes src/figs/halmode.pdf | Bin 0 -> 15183 bytes src/figs/privimps.pdf | Bin 0 -> 13446 bytes src/figs/virtimps.pdf | Bin 0 -> 15538 bytes src/gmaps.tex | 75 + src/graffles/PLIC-block-diagram.graffle | Bin 0 -> 11052 bytes src/graffles/PLIC-interrupt-flow.graffle | Bin 0 -> 3578 bytes src/graffles/privimps.graffle | 8179 ++++++++++++++++++++++++++++++ src/history.tex | 248 + src/hypervisor.tex | 11 + src/instr-table.tex | 1958 +++++++ src/intro.tex | 502 ++ src/l.tex | 17 + src/m.tex | 139 + src/machine.tex | 2312 +++++++++ src/naming.tex | 143 + src/opcode-map.tex | 22 + src/p.tex | 92 + src/plic.tex | 427 ++ src/preamble.tex | 121 + src/preface.tex | 154 + src/priv-csrs.tex | 421 ++ src/priv-history.tex | 34 + src/priv-insns.tex | 6 + src/priv-instr-table.tex | 113 + src/priv-intro.tex | 261 + src/priv-preface.tex | 20 + src/q.tex | 305 ++ src/riscv-privileged.tex | 67 + src/riscv-spec.bib | 468 ++ src/riscv-spec.tex | 72 + src/rv128.tex | 64 + src/rv32.tex | 1359 +++++ src/rv32e.tex | 84 + src/rv64.tex | 253 + src/rvc-instr-table.tex | 537 ++ src/rvc-opcode-map.tex | 27 + src/sbi.tex | 77 + src/supervisor.tex | 1148 +++++ src/t.tex | 16 + src/v.tex | 749 +++ 53 files changed, 24046 insertions(+) create mode 100644 src/a.tex create mode 100644 src/assembly.tex create mode 100644 src/b.tex create mode 100644 src/bbding.sty create mode 100644 src/c.tex create mode 100644 src/calling.tex create mode 100644 src/cfgstr.tex create mode 100644 src/d.tex create mode 100644 src/extensions.tex create mode 100644 src/f.tex create mode 100644 src/figs/PLIC-block-diagram.pdf create mode 100644 src/figs/PLIC-interrupt-flow.pdf create mode 100644 src/figs/halimps.pdf create mode 100644 src/figs/halmode.pdf create mode 100644 src/figs/privimps.pdf create mode 100644 src/figs/virtimps.pdf create mode 100644 src/gmaps.tex create mode 100644 src/graffles/PLIC-block-diagram.graffle create mode 100644 src/graffles/PLIC-interrupt-flow.graffle create mode 100644 src/graffles/privimps.graffle create mode 100644 src/history.tex create mode 100644 src/hypervisor.tex create mode 100644 src/instr-table.tex create mode 100644 src/intro.tex create mode 100644 src/l.tex create mode 100644 src/m.tex create mode 100644 src/machine.tex create mode 100644 src/naming.tex create mode 100644 src/opcode-map.tex create mode 100644 src/p.tex create mode 100644 src/plic.tex create mode 100644 src/preamble.tex create mode 100644 src/preface.tex create mode 100644 src/priv-csrs.tex create mode 100644 src/priv-history.tex create mode 100644 src/priv-insns.tex create mode 100644 src/priv-instr-table.tex create mode 100644 src/priv-intro.tex create mode 100644 src/priv-preface.tex create mode 100644 src/q.tex create mode 100644 src/riscv-privileged.tex create mode 100644 src/riscv-spec.bib create mode 100644 src/riscv-spec.tex create mode 100644 src/rv128.tex create mode 100644 src/rv32.tex create mode 100644 src/rv32e.tex create mode 100644 src/rv64.tex create mode 100644 src/rvc-instr-table.tex create mode 100644 src/rvc-opcode-map.tex create mode 100644 src/sbi.tex create mode 100644 src/supervisor.tex create mode 100644 src/t.tex create mode 100644 src/v.tex (limited to 'src') diff --git a/src/a.tex b/src/a.tex new file mode 100644 index 0000000..8c3c745 --- /dev/null +++ b/src/a.tex @@ -0,0 +1,379 @@ +\chapter{``A'' Standard Extension for Atomic Instructions, Version 2.0} +\label{atomics} + +The standard atomic instruction extension is denoted by instruction +subset name ``A'', and contains instructions that atomically +read-modify-write memory to support synchronization between multiple +RISC-V threads running in the same memory space. The two forms of +atomic instruction provided are load-reserved/store-conditional +instructions and atomic fetch-and-op memory instructions. Both types +of atomic instruction support various memory consistency orderings +including unordered, acquire, release, and sequentially consistent +semantics. These instructions allow RISC-V to support the RCsc memory +consistency model~\cite{Gharachorloo90memoryconsistency}. + +\begin{commentary} +After much debate, the language community and architecture community +appear to have finally settled on release consistency as the standard +memory consistency model and so the RISC-V atomic support is built +around this model. +\end{commentary} + +\section{Specifying Ordering of Atomic Instructions} + +The base RISC-V ISA has a relaxed memory model, with the FENCE +instruction used to impose additional ordering constraints. The +address space is divided by the execution environment into memory and +I/O domains, and the FENCE instruction provides options to order +accesses to one or both of these two address domains. + +To provide more efficient support for release +consistency~\cite{Gharachorloo90memoryconsistency}, each atomic +instruction has two bits, {\em aq} and {\em rl}, used to specify +additional memory ordering constraints as viewed by other RISC-V +threads. The bits order accesses to one of the two address domains, +memory or I/O, depending on which address domain the atomic +instruction is accessing. No ordering constraint is implied to +accesses to the other domain, and a FENCE instruction should be used +to order across both domains. + +If both bits are clear, no additional ordering constraints are imposed +on the atomic memory operation. If only the {\em aq} bit is set, the +atomic memory operation is treated as an {\em acquire} access, i.e., +no following memory operations on this RISC-V thread can be observed +to take place before the acquire memory operation. If only the {\em + rl} bit is set, the atomic memory operation is treated as a {\em + release} access, i.e., the release memory operation can not be +observed to take place before any earlier memory operations on this +RISC-V thread. If both the {\em aq} and {\em rl} bits are set, the +atomic memory operation is {\em sequentially consistent} and cannot be +observed to happen before any earlier memory operations or after any +later memory operations in the same RISC-V thread, and can only be +observed by any other thread in the same global order of all +sequentially consistent atomic memory operations to the same address +domain. + +\begin{commentary} +Theoretically, the definition of the {\em aq} and {\em rl} bits allows +for implementations without global store atomicity. When both {\em + aq} and {\em rl} bits are set, however, we require full sequential +consistency for the atomic operation which implies global store +atomicity in addition to both acquire and release semantics. In +practice, hardware systems are usually implemented with global store +atomicity, embodied in local processor ordering rules together with +single-writer cache coherence protocols. +\end{commentary} + +\section{Load-Reserved/Store-Conditional Instructions} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}W@{}W@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbit{26} & +\instbit{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 1 & 1 & 5 & 5 & 3 & 5 & 7 \\ +LR & \multicolumn{2}{c}{ordering} & 0 & addr & width & dest & AMO \\ +SC & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +\end{tabular} +\end{center} + +Complex atomic memory operations on a single memory word are performed +with the load-reserved (LR) and store-conditional (SC) instructions. +LR loads a word from the address in {\em rs1}, places the +sign-extended value in {\em rd}, and registers a reservation on the +memory address. SC writes a word in {\em rs2} to the address in {\em + rs1}, provided a valid reservation still exists on that address. SC +writes zero to {\em rd} on success or a nonzero code on failure. + +\begin{commentary} +Both compare-and-swap (CAS) and LR/SC can be used to build lock-free +data structures. After extensive discussion, we opted for LR/SC for +several reasons: 1) CAS suffers from the ABA problem, which LR/SC +avoids because it monitors all accesses to the address rather than +only checking for changes in the data value; 2) CAS would also require +a new integer instruction format to support three source operands +(address, compare value, swap value) as well as a different memory +system message format, which would complicate microarchitectures; 3) +Furthermore, to avoid the ABA problem, other systems provide a +double-wide CAS (DW-CAS) to allow a counter to be tested and +incremented along with a data word. This requires reading five +registers and writing two in one instruction, and also a new larger +memory system message type, further complicating implementations; 4) +LR/SC provides a more efficient implementation of many primitives as +it only requires one load as opposed to two with CAS (one load before +the CAS instruction to obtain a value for speculative computation, +then a second load as part of the CAS instruction to check if value is +unchanged before updating). + +The main disadvantage of LR/SC over CAS is livelock, which we avoid +with an architected guarantee of eventual forward progress as +described below. Another concern is whether the influence of the +current x86 architecture, with its DW-CAS, will complicate porting of +synchronization libraries and other software that assumes DW-CAS is +the basic machine primitive. A possible mitigating factor is the +recent addition of transactional memory instructions to x86, which +might cause a move away from DW-CAS. +\end{commentary} + +The failure code with value 1 is reserved to encode an unspecified +failure. Other failure codes are reserved at this time, and portable +software should only assume the failure code will be non-zero. LR and +SC operate on naturally-aligned 64-bit (RV64 only) or 32-bit words in +memory. Misaligned addresses will generate misaligned address +exceptions. + +\begin{commentary} +We reserve a failure code of 1 to mean ``unspecified'' so that simple +implementations may return this value using the existing mux required +for the SLT/SLTU instructions. More specific failure codes might be +defined in future versions or extensions to the ISA. +\end{commentary} + +\label{lrscseq} + +In the standard A extension, certain constrained LR/SC sequences are +guaranteed to succeed eventually. The static code for the LR/SC +sequence plus the code to retry the sequence in case of failure must +comprise at most 16 integer instructions placed sequentially in +memory. For the sequence to be guaranteed to eventually succeed, the +dynamic code executed between the LR and SC instructions can only +contain other instructions from the base ``I'' subset, excluding +loads, stores, backward jumps or taken backward branches, FENCE, +FENCE.I, and SYSTEM instructions. The code to retry a failing LR/SC +sequence can contain backward jumps and/or branches to repeat the +LR/SC sequence, but otherwise has the same constraints. The SC must +be to the same address as the latest LR executed. LR/SC sequences +that do not meet these constraints might complete on some attempts on +some implementations, but there is no guarantee of eventual success. + +\begin{commentary} +One advantage of CAS is that it guarantees that some thread eventually +makes progress, whereas an LR/SC atomic sequence could livelock +indefinitely on some systems. To avoid this concern, we added an +architectural guarantee of forward progress to LR/SC atomic sequences. +The restrictions on LR/SC sequence contents allows an implementation +to capture a cache line on the LR and complete the LR/SC sequence by +holding off remote cache interventions for a bounded short +time. Interrupts and TLB misses might cause the reservation to be +lost, but eventually the atomic sequence can complete. We restricted +the length of LR/SC sequences to fit within 64 contiguous instruction +bytes in the base ISA to avoid undue restrictions on instruction cache +and TLB size and associativity. Similarly, we disallowed other loads +and stores within the sequences to avoid restrictions on data cache +associativity. The restrictions on branches and jumps limits the time +that can be spent in the sequence. Floating-point operations and +integer multiply/divide were disallowed to simplify the operating +system's emulation of these instructions on implementations lacking +appropriate hardware support. +\end{commentary} + +An implementation can reserve an arbitrary subset of the memory space +on each LR and multiple LR reservations might be active simultaneously +for a single hart. An SC can succeed if no accesses from other harts +to the address can be observed to have occurred between the SC and +the last LR in this hart to reserve the address. Note this LR might +have had a different address argument, but reserved the SC's address +as part of the memory subset. Following this model, in systems with +memory translation, an SC is allowed to succeed if the earlier LR +reserved the same location using an alias with a different virtual +address, but is also allowed to fail if the virtual address is +different. The SC must fail if there is an observable memory access +from another hart to the address, or if there is an intervening +context switch on this hart, or if in the meantime the hart executed a +privileged exception-return instruction. + +\begin{commentary} +The specification explicitly allows implementations to support more +powerful implementations with wider guarantees, provided they do not +void the atomicity guarantees for the constrained sequences. +\end{commentary} + +LR/SC can be used to construct lock-free data structures. An example +using LR/SC to implement a compare-and-swap function is shown in +Figure~\ref{cas}. If inlined, compare-and-swap functionality need +only take three instructions. + +\begin{figure}[h!] +\begin{center} +\begin{verbatim} + # a0 holds address of memory location + # a1 holds expected value + # a2 holds desired value + # a0 holds return value, 0 if successful, !0 otherwise + cas: + lr.w t0, (a0) # Load original value. + bne t0, a1, fail # Doesn't match, so fail. + sc.w a0, a2, (a0) # Try to update. + jr ra # Return. + fail: + li a0, 1 # Set return to failure. + jr ra # Return. +\end{verbatim} +\end{center} +\caption{Sample code for compare-and-swap function using LR/SC.} +\label{cas} +\end{figure} + +An SC instruction can never be observed by another RISC-V thread +before the immediately preceding LR. Due to the atomic nature of the +LR/SC sequence, no memory operations from any thread can be observed +to have occurred between the LR and a successful SC. The LR/SC +sequence can be given acquire semantics by setting the {\em aq} bit on +the SC instruction. The LR/SC sequence can be given release semantics +by setting the {\em rl} bit on the LR instruction. Setting both {\em + aq} and {\em rl} bits on the LR instruction, and setting the {\em + aq} bit on the SC instruction makes the LR/SC sequence sequentially +consistent with respect to other sequentially consistent atomic +operations. + +If neither bit is set on both LR and SC, the LR/SC sequence can be +observed to occur before or after surrounding memory operations from +the same RISC-V thread. This can be appropriate when the LR/SC +sequence is used to implement a parallel reduction operation. + +\begin{commentary} +In general, a multi-word atomic primitive is desirable but there is +still considerable debate about what form this should take, and +guaranteeing forward progress adds complexity to a system. Our +current thoughts are to include a small limited-capacity transactional +memory buffer along the lines of the original transactional memory +proposals as an optional standard extension ``T''. +\end{commentary} + +\section{Atomic Memory Operations} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}W@{}W@{}R@{}R@{}F@{}R@{}R} +\\ +\instbitrange{31}{27} & +\instbit{26} & +\instbit{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 1 & 1 & 5 & 5 & 3 & 5 & 7 \\ +AMOSWAP.W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOADD.W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOAND.W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOOR.W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOXOR.W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOMAX[U].W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +AMOMIN[U].W/D & \multicolumn{2}{c}{ordering} & src & addr & width & dest & AMO \\ +\end{tabular} +\end{center} + +\vspace{-0.1in} The atomic memory operation (AMO) instructions perform +read-modify-write operations for multiprocessor synchronization and +are encoded with an R-type instruction format. These AMO instructions +atomically load a data value from the address in {\em rs1}, place the +value into register {\em rd}, apply a binary operator to the loaded +value and the original value in {\em rs2}, then store the result back +to the address in {\em rs1}. AMOs can either operate on 64-bit (RV64 +only) or 32-bit words in memory. For RV64, 32-bit AMOs always +sign-extend the value placed in {\em rd}. The address held in {\em + rs1} must be naturally aligned to the size of the operand (i.e., +eight-byte aligned for 64-bit words and four-byte aligned for 32-bit +words). If the address is not naturally aligned, a misaligned address +exception will be generated. + +The operations supported are swap, integer add, logical AND, logical +OR, logical XOR, and signed and unsigned integer maximum and minimum. +Without ordering constraints, these AMOs can be used to implement +parallel reduction operations, where typically the return value would +be discarded by writing to {\tt x0}. + +\begin{commentary} +We provided fetch-and-op style atomic primitives as they scale to +highly parallel systems better than LR/SC or CAS. A simple +microarchitecture can implement AMOs using the LR/SC primitives. More +complex implementations might also implement AMOs at memory +controllers, and can optimize away fetching the original value when +the destination is {\tt x0}. +\end{commentary} + +To help implement multiprocessor synchronization, the AMOs optionally +provide release consistency semantics. If the {\em aq} bit is set, +then no later memory operations in this RISC-V thread can be observed +to take place before the AMO. +Conversely, if the {\em rl} bit is set, then other +RISC-V threads will not observe the AMO before memory accesses +preceding the AMO in this RISC-V thread. + +\begin{commentary} +The AMOs were designed to implement the C11 and C++11 memory models +efficiently. Although the FENCE R, RW instruction suffices to +implement the {\em acquire} operation and FENCE RW, W suffices to +implement {\em release}, both imply additional unnecessary ordering as +compared to AMOs with the corresponding {\em aq} or {\em rl} bit set. +\end{commentary} + +AMOs can also be used to provide sequentially consistent loads and +stores. A sequentially consistent load can be implemented as an +AMOADD of x0 with both {\em aq} and {\em rl} set. A sequentially +consistent store can be implemented as an AMOSWAP that writes the old +value to x0 and has both {\em aq} and {\em rl} set. + +An example code sequence for a critical section guarded by a +test-and-set spinlock is shown in Figure~\ref{critical}. Note the +first AMO is marked {\em aq} to order the lock acquisition before the +critical section, and the second AMO is marked {\em rl} to order +the critical section before the lock relinquishment. + +\begin{figure}[h!] +\begin{center} +\begin{verbatim} + li t0, 1 # Initialize swap value. + again: + amoswap.w.aq t0, t0, (a0) # Attempt to acquire lock. + bnez t0, again # Retry if held. + # ... + # Critical section. + # ... + amoswap.w.rl x0, x0, (a0) # Release lock by storing 0. +\end{verbatim} +\end{center} +\caption{Sample code for mutual exclusion. {\tt a0} contains the address of the lock.} +\label{critical} +\end{figure} + +\begin{commentary} +We recommend the use of the AMO Swap idiom shown above for both lock +acquire and release to simplify the implementation of speculative lock +elision~\cite{Rajwar:2001:SLE}. + +At the risk of complicating the implementation of atomic operations, +microarchitectures can elide the store within the acquire swap if the +lock value matches the swap value, to avoid dirtying a cache line held +in a shared or exclusive clean state. The effect is similar to a +test-and-test-and-set lock but with shorter code paths. +\end{commentary} diff --git a/src/assembly.tex b/src/assembly.tex new file mode 100644 index 0000000..bd34d40 --- /dev/null +++ b/src/assembly.tex @@ -0,0 +1,118 @@ +\chapter{RISC-V Assembly Programmer's Handbook} +\label{assembly} + +This chapter is a placeholder for an assembly programmer's manual. + +Tables~\ref{pseudos} and \ref{csr-pseudos} contain a listing of standard +RISC-V pseudoinstructions. + +\begin{table}[h] +\begin{small} +\begin{center} +\begin{tabular}{l l l} +Pseudoinstruction & Base Instruction(s) & Meaning \\ \hline + +\multirow{2}{*}{\tt la rd, symbol} & {\tt auipc rd, symbol[31:12]} & \multirow{2}{*}{Load address} \\ + & {\tt addi rd, rd, symbol[11:0]} \\ +\multirow{2}{*}{\tt l\{b|h|w|d\} rd, symbol} & {\tt auipc rd, symbol[31:12]} & \multirow{2}{*}{Load global} \\ + & {\tt l\{b|h|w|d\} rd, symbol[11:0](rd)} \\ +\multirow{2}{*}{\tt s\{b|h|w|d\} rd, symbol, rt} & {\tt auipc rt, symbol[31:12]} & \multirow{2}{*}{Store global} \\ + & {\tt s\{b|h|w|d\} rd, symbol[11:0](rt)} \\ +\multirow{2}{*}{\tt fl\{w|d\} rd, symbol, rt} & {\tt auipc rt, symbol[31:12]} & \multirow{2}{*}{Floating-point load global} \\ + & {\tt fl\{w|d\} rd, symbol[11:0](rt)} \\ +\multirow{2}{*}{\tt fs\{w|d\} rd, symbol, rt} & {\tt auipc rt, symbol[31:12]} & \multirow{2}{*}{Floating-point store global} \\ + & {\tt fs\{w|d\} rd, symbol[11:0](rt)} \\ +\hline +{\tt nop} & {\tt addi x0, x0, 0} & No operation \\ +{\tt li rd, immediate} & {\em Myriad sequences} & Load immediate \\ +{\tt mv rd, rs} & {\tt addi rd, rs, 0} & Copy register \\ +{\tt not rd, rs} & {\tt xori rd, rs, -1} & One's complement \\ +{\tt neg rd, rs} & {\tt sub rd, x0, rs} & Two's complement \\ +{\tt negw rd, rs} & {\tt subw rd, x0, rs} & Two's complement word \\ +{\tt sext.w rd, rs} & {\tt addiw rd, rs, 0} & Sign extend word \\ +{\tt seqz rd, rs} & {\tt sltiu rd, rs, 1} & Set if $=$ zero \\ +{\tt snez rd, rs} & {\tt sltu rd, x0, rs} & Set if $\neq$ zero \\ +{\tt sltz rd, rs} & {\tt slt rd, rs, x0} & Set if $<$ zero \\ +{\tt sgtz rd, rs} & {\tt slt rd, x0, rs} & Set if $>$ zero \\ +\hline +{\tt fmv.s rd, rs} & {\tt fsgnj.s rd, rs, rs} & Copy single-precision register \\ +{\tt fabs.s rd, rs} & {\tt fsgnjx.s rd, rs, rs} & Single-precision absolute value \\ +{\tt fneg.s rd, rs} & {\tt fsgnjn.s rd, rs, rs} & Single-precision negate \\ +{\tt fmv.d rd, rs} & {\tt fsgnj.d rd, rs, rs} & Copy double-precision register \\ +{\tt fabs.d rd, rs} & {\tt fsgnjx.d rd, rs, rs} & Double-precision absolute value \\ +{\tt fneg.d rd, rs} & {\tt fsgnjn.d rd, rs, rs} & Double-precision negate \\ +\hline +{\tt beqz rs, offset} & {\tt beq rs, x0, offset} & Branch if $=$ zero \\ +{\tt bnez rs, offset} & {\tt bne rs, x0, offset} & Branch if $\neq$ zero \\ +{\tt blez rs, offset} & {\tt bge x0, rs, offset} & Branch if $\leq$ zero \\ +{\tt bgez rs, offset} & {\tt bge rs, x0, offset} & Branch if $\geq$ zero \\ +{\tt bltz rs, offset} & {\tt blt rs, x0, offset} & Branch if $<$ zero \\ +{\tt bgtz rs, offset} & {\tt blt x0, rs, offset} & Branch if $>$ zero \\ +\hline +{\tt bgt rs, rt, offset} & {\tt blt rt, rs, offset} & Branch if $>$ \\ +{\tt ble rs, rt, offset} & {\tt bge rt, rs, offset} & Branch if $\leq$ \\ +{\tt bgtu rs, rt, offset} & {\tt bltu rt, rs, offset} & Branch if $>$, unsigned \\ +{\tt bleu rs, rt, offset} & {\tt bgeu rt, rs, offset} & Branch if $\leq$, unsigned \\ +\hline +{\tt j offset} & {\tt jal x0, offset} & Jump \\ +{\tt jal offset} & {\tt jal x1, offset} & Jump and link \\ +{\tt jr rs} & {\tt jalr x0, rs, 0} & Jump register \\ +{\tt jalr rs} & {\tt jalr x1, rs, 0} & Jump and link register \\ +{\tt ret} & {\tt jalr x0, x1, 0} & Return from subroutine \\ +\multirow{2}{*}{\tt call offset} & {\tt auipc x6, offset[31:12]} & \multirow{2}{*}{Call far-away subroutine} \\ + & {\tt jalr x1, x6, offset[11:0]} \\ +\multirow{2}{*}{\tt tail offset} & {\tt auipc x6, offset[31:12]} & \multirow{2}{*}{Tail call far-away subroutine} \\ + & {\tt jalr x0, x6, offset[11:0]} & \\ +\hline +{\tt fence} & {\tt fence iorw, iorw} & Fence on all memory and I/O \\ +\hline + +\end{tabular} +\end{center} +\end{small} +\caption{RISC-V pseudoinstructions.} +\label{pseudos} +\end{table} + +\begin{table}[h] +\begin{small} +\begin{center} +\begin{tabular}{l l l} +Pseudoinstruction & Base Instruction & Meaning \\ \hline + +{\tt rdinstret[h] rd} & {\tt csrrs rd, instret[h], x0} & Read instructions-retired counter \\ +{\tt rdcycle[h] rd} & {\tt csrrs rd, cycle[h], x0} & Read cycle counter \\ +{\tt rdtime[h] rd} & {\tt csrrs rd, time[h], x0} & Read real-time clock \\ +\hline +{\tt csrr rd, csr} & {\tt csrrs rd, csr, x0} & Read CSR \\ +{\tt csrw csr, rs} & {\tt csrrw x0, csr, rs} & Write CSR \\ +{\tt csrs csr, rs} & {\tt csrrs x0, csr, rs} & Set bits in CSR \\ +{\tt csrc csr, rs} & {\tt csrrc x0, csr, rs} & Clear bits in CSR \\ +\hline +{\tt csrwi csr, imm} & {\tt csrrwi x0, csr, imm} & Write CSR, immediate \\ +{\tt csrsi csr, imm} & {\tt csrrsi x0, csr, imm} & Set bits in CSR, immediate \\ +{\tt csrci csr, imm} & {\tt csrrci x0, csr, imm} & Clear bits in CSR, immediate \\ +\hline +{\tt frcsr rd} & {\tt csrrs rd, fcsr, x0} & Read FP control/status register \\ +{\tt fscsr rd, rs} & {\tt csrrw rd, fcsr, rs} & Swap FP control/status register \\ +{\tt fscsr rs} & {\tt csrrw x0, fcsr, rs} & Write FP control/status register \\ +\hline +{\tt frrm rd} & {\tt csrrs rd, frm, x0} & Read FP rounding mode \\ +{\tt fsrm rd, rs} & {\tt csrrw rd, frm, rs} & Swap FP rounding mode \\ +{\tt fsrm rs} & {\tt csrrw x0, frm, rs} & Write FP rounding mode \\ +{\tt fsrmi rd, imm} & {\tt csrrwi rd, frm, imm} & Swap FP rounding mode, immediate \\ +{\tt fsrmi imm} & {\tt csrrwi x0, frm, imm} & Write FP rounding mode, immediate \\ +\hline +{\tt frflags rd} & {\tt csrrs rd, fflags, x0} & Read FP exception flags \\ +{\tt fsflags rd, rs} & {\tt csrrw rd, fflags, rs} & Swap FP exception flags \\ +{\tt fsflags rs} & {\tt csrrw x0, fflags, rs} & Write FP exception flags \\ +{\tt fsflagsi rd, imm} & {\tt csrrwi rd, fflags, imm} & Swap FP exception flags, immediate \\ +{\tt fsflagsi imm} & {\tt csrrwi x0, fflags, imm} & Write FP exception flags, immediate \\ +\hline + +\end{tabular} +\end{center} +\end{small} +\caption{Pseudoinstructions for accessing control and status registers.} +\label{csr-pseudos} +\end{table} diff --git a/src/b.tex b/src/b.tex new file mode 100644 index 0000000..0951df4 --- /dev/null +++ b/src/b.tex @@ -0,0 +1,19 @@ +\chapter{``B'' Standard Extension for Bit Manipulation, Version 0.0} +\label{sec:bits} + +This chapter is a placeholder for a future standard extension to +provide bit manipulation instructions, including instructions to +insert, extract, and test bit fields, and for rotations, funnel +shifts, and bit and byte permutations. + +\begin{commentary} +Although bit manipulation instructions are very effective in some +application domains, particularly when dealing with externally packed +data structures, we excluded them from the base ISA as they are not +useful in all domains and can add additional complexity or instruction +formats to supply all needed operands. + +We anticipate the B extension will be a brownfield encoding within the +base 30-bit instruction space. +\end{commentary} + diff --git a/src/bbding.sty b/src/bbding.sty new file mode 100644 index 0000000..7e49f44 --- /dev/null +++ b/src/bbding.sty @@ -0,0 +1,158 @@ +%% +%% This is file `bbding.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% bbding.dtx (with options: `sty') +%% +%% IMPORTANT NOTICE: +%% +%% For the copyright see the source file. +%% +%% Any modified versions of this file must be renamed +%% with new filenames distinct from bbding.sty. +%% +%% For distribution of the original source see the terms +%% for copying and modification in the file bbding.dtx. +%% +%% This generated file may be distributed as long as the +%% original source files, as listed above, are part of the +%% same distribution. (The sources need not necessarily be +%% in the same archive or directory.) + + +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{bbding}% + [1999/04/15 v1.01 Dingbats symbols% + ] +\newcommand{\dingfamily}{\fontencoding{U}\fontfamily{ding}\selectfont} +\newcommand{\@chooseSymbol}[1]{{\dingfamily\symbol{#1}}} +\newcommand{\ScissorRightBrokenBottom}{\@chooseSymbol{'000}} +\newcommand{\ScissorRight}{\@chooseSymbol{'001}} +\newcommand{\ScissorRightBrokenTop}{\@chooseSymbol{'002}} +\newcommand{\ScissorLeftBrokenBottom}{\@chooseSymbol{'003}} +\newcommand{\ScissorLeft}{\@chooseSymbol{'004}} +\newcommand{\ScissorLeftBrokenTop}{\@chooseSymbol{'005}} +\newcommand{\ScissorHollowRight}{\@chooseSymbol{'006}} +\newcommand{\ScissorHollowLeft}{\@chooseSymbol{'007}} +\newcommand{\Phone}{\@chooseSymbol{'010}} +\newcommand{\PhoneHandset}{\@chooseSymbol{'011}} +\newcommand{\Tape}{\@chooseSymbol{'012}} +\newcommand{\Plane}{\@chooseSymbol{'013}} +\newcommand{\Envelope}{\@chooseSymbol{'014}} +\newcommand{\HandCuffRight}{\@chooseSymbol{'015}} +\newcommand{\HandCuffLeft}{\@chooseSymbol{'016}} +\newcommand{\HandCuffRightUp}{\@chooseSymbol{'017}} +\newcommand{\HandCuffLeftUp}{\@chooseSymbol{'020}} +\newcommand{\HandRight}{\@chooseSymbol{'021}} +\newcommand{\HandLeft}{\@chooseSymbol{'022}} +\newcommand{\HandRightUp}{\@chooseSymbol{'023}} +\newcommand{\HandLeftUp}{\@chooseSymbol{'024}} +\newcommand{\Peace}{\@chooseSymbol{'025}} +\newcommand{\HandPencilLeft}{\@chooseSymbol{'026}} +\newcommand{\PencilRight}{\@chooseSymbol{'027}} +\newcommand{\PencilLeft}{\@chooseSymbol{'030}} +\newcommand{\PencilRightUp}{\@chooseSymbol{'031}} +\newcommand{\PencilLeftUp}{\@chooseSymbol{'032}} +\newcommand{\PencilRightDown}{\@chooseSymbol{'033}} +\newcommand{\PencilLeftDown}{\@chooseSymbol{'034}} +\newcommand{\NibRight}{\@chooseSymbol{'035}} +\newcommand{\NibLeft}{\@chooseSymbol{'036}} +\newcommand{\NibSolidRight}{\@chooseSymbol{'037}} +\newcommand{\NibSolidLeft}{\@chooseSymbol{'040}} +\newcommand{\Checkmark}{\@chooseSymbol{'041}} +\newcommand{\CheckmarkBold}{\@chooseSymbol{'042}} +\newcommand{\XSolid}{\@chooseSymbol{'043}} +\newcommand{\XSolidBold}{\@chooseSymbol{'044}} +\newcommand{\XSolidBrush}{\@chooseSymbol{'045}} +\newcommand{\PlusOutline}{\@chooseSymbol{'046}} +\newcommand{\Plus}{\@chooseSymbol{'047}} +\newcommand{\PlusCenterOpen}{\@chooseSymbol{'050}} +\newcommand{\PlusThinCenterOpen}{\@chooseSymbol{'051}} +\newcommand{\Cross}{\@chooseSymbol{'052}} +\newcommand{\CrossOpenShadow}{\@chooseSymbol{'053}} +\newcommand{\CrossOutline}{\@chooseSymbol{'054}} +\newcommand{\CrossBoldOutline}{\@chooseSymbol{'055}} +\newcommand{\CrossMaltese}{\@chooseSymbol{'056}} +\newcommand{\DavidStarSolid}{\@chooseSymbol{'057}} +\newcommand{\DavidStar}{\@chooseSymbol{'060}} +\newcommand{\FourAsterisk}{\@chooseSymbol{'061}} +\newcommand{\JackStar}{\@chooseSymbol{'062}} +\newcommand{\JackStarBold}{\@chooseSymbol{'063}} +\newcommand{\CrossClowerTips}{\@chooseSymbol{'064}} +\newcommand{\FourStar}{\@chooseSymbol{'065}} +\newcommand{\FourStarOpen}{\@chooseSymbol{'066}} +\newcommand{\FiveStarLines}{\@chooseSymbol{'067}} +\newcommand{\FiveStar}{\@chooseSymbol{'070}} +\newcommand{\FiveStarOpen}{\@chooseSymbol{'071}} +\newcommand{\FiveStarOpenCircled}{\@chooseSymbol{'072}} +\newcommand{\FiveStarCenterOpen}{\@chooseSymbol{'073}} +\newcommand{\FiveStarOpenDotted}{\@chooseSymbol{'074}} +\newcommand{\FiveStarOutline}{\@chooseSymbol{'075}} +\newcommand{\FiveStarOutlineHeavy}{\@chooseSymbol{'076}} +\newcommand{\FiveStarConvex}{\@chooseSymbol{'077}} +\newcommand{\FiveStarShadow}{\@chooseSymbol{'100}} +\newcommand{\AsteriskBold}{\@chooseSymbol{'101}} +\newcommand{\AsteriskCenterOpen}{\@chooseSymbol{'102}} +\newcommand{\AsteriskThin}{\@chooseSymbol{'103}} +\newcommand{\AsteriskThinCenterOpen}{\@chooseSymbol{'104}} +\newcommand{\EightStarTaper}{\@chooseSymbol{'105}} +\newcommand{\EightStarConvex}{\@chooseSymbol{'106}} +\newcommand{\SixStar}{\@chooseSymbol{'107}} +\newcommand{\EightStar}{\@chooseSymbol{'110}} +\newcommand{\EightStarBold}{\@chooseSymbol{'111}} +\newcommand{\TwelweStar}{\@chooseSymbol{'112}} +\newcommand{\SixteenStarLight}{\@chooseSymbol{'113}} +\newcommand{\SixFlowerPetalRemoved}{\@chooseSymbol{'114}} +\newcommand{\SixFlowerOpenCenter}{\@chooseSymbol{'115}} +\newcommand{\Asterisk}{\@chooseSymbol{'116}} +\newcommand{\SixFlowerAlternate}{\@chooseSymbol{'117}} +\newcommand{\FiveFlowerPetal}{\@chooseSymbol{'120}} +\newcommand{\SixFlowerPetalDotted}{\@chooseSymbol{'121}} +\newcommand{\FiveFlowerOpen}{\@chooseSymbol{'122}} +\newcommand{\EightFlowerPetal}{\@chooseSymbol{'123}} +\newcommand{\SunshineOpenCircled}{\@chooseSymbol{'124}} +\newcommand{\SixFlowerAltPetal}{\@chooseSymbol{'125}} +\newcommand{\FourClowerOpen}{\@chooseSymbol{'126}} +\newcommand{\FourClowerSolid}{\@chooseSymbol{'127}} +\newcommand{\AsteriskRoundedEnds}{\@chooseSymbol{'130}} +\newcommand{\EightFlowerPetalRemoved}{\@chooseSymbol{'131}} +\newcommand{\EightAsterisk}{\@chooseSymbol{'132}} +\newcommand{\SixFlowerRemovedOpenPetal}{\@chooseSymbol{'133}} +\newcommand{\SparkleBold}{\@chooseSymbol{'134}} +\newcommand{\Sparkle}{\@chooseSymbol{'135}} +\newcommand{\SnowflakeChevron}{\@chooseSymbol{'136}} +\newcommand{\SnowflakeChevronBold}{\@chooseSymbol{'137}} +\newcommand{\Snowflake}{\@chooseSymbol{'140}} +\newcommand{\CircleSolid}{\@chooseSymbol{'141}} +\newcommand{\Ellipse}{\@chooseSymbol{'142}} +\newcommand{\EllipseSolid}{\@chooseSymbol{'143}} +\newcommand{\CircleShadow}{\@chooseSymbol{'144}} +\newcommand{\EllipseShadow}{\@chooseSymbol{'145}} +\newcommand{\Square}{\@chooseSymbol{'146}} +\newcommand{\SquareSolid}{\@chooseSymbol{'147}} +\newcommand{\SquareShadowBottomRight}{\@chooseSymbol{'150}} +\newcommand{\SquareShadowTopRight}{\@chooseSymbol{'151}} +\newcommand{\SquareShadowTopLeft}{\@chooseSymbol{'152}} +\newcommand{\SquareCastShadowBottomRight}{\@chooseSymbol{'153}} +\newcommand{\SquareCastShadowTopRight}{\@chooseSymbol{'154}} +\newcommand{\SquareCastShadowTopLeft}{\@chooseSymbol{'155}} +\newcommand{\TriangleUp}{\@chooseSymbol{'156}} +\newcommand{\TriangleDown}{\@chooseSymbol{'157}} +\newcommand{\DiamondSolid}{\@chooseSymbol{'160}} +\newcommand{\OrnamentDiamondSolid}{\@chooseSymbol{'161}} +\newcommand{\HalfCircleRight}{\@chooseSymbol{'162}} +\newcommand{\HalfCircleLeft}{\@chooseSymbol{'163}} +\newcommand{\RectangleThin}{\@chooseSymbol{'164}} +\newcommand{\Rectangle}{\@chooseSymbol{'165}} +\newcommand{\RectangleBold}{\@chooseSymbol{'166}} +\newcommand{\ArrowBoldRightStrobe}{\@chooseSymbol{'167}} +\newcommand{\ArrowBoldUpRight}{\@chooseSymbol{'170}} +\newcommand{\ArrowBoldDownRight}{\@chooseSymbol{'171}} +\newcommand{\ArrowBoldRightShort}{\@chooseSymbol{'172}} +\newcommand{\ArrowBoldRightCircled}{\@chooseSymbol{'173}} +\endinput +%% +%% End of file `bbding.sty'. + diff --git a/src/c.tex b/src/c.tex new file mode 100644 index 0000000..2c81f7b --- /dev/null +++ b/src/c.tex @@ -0,0 +1,1162 @@ +\chapter{``C'' Standard Extension for Compressed Instructions, Version +1.9} +\label{compressed} + +This chapter describes the current draft proposal for the RISC-V +standard compressed instruction set extension, named ``C'', which +reduces static and dynamic code size by adding short 16-bit +instruction encodings for common operations. The C extension can be +added to any of the base ISAs (RV32, RV64, RV128), and we use the +generic term ``RVC'' to cover any of these. Typically, 50\%--60\% of +the RISC-V instructions in a program can be replaced with RVC +instructions, resulting in a 25\%--30\% code-size reduction. + +We believe this draft represents the close to final design for RV32C +and RV64C (it seems premature to freeze R128C), though we are +requesting one more round of comments, hence the 1.9 revision number. +Please send your comments to the {\tt isa-dev} mailing list at {\tt + isa-dev@lists.riscv.org}. + +\section{Overview} + +RVC uses a simple compression scheme that offers shorter 16-bit +versions of common 32-bit RISC-V instructions when: +\begin{tightlist} + \item the immediate or address offset is small, or + \item one of the registers is the zero register ({\tt x0}), the + ABI link register ({\tt x1}), or the ABI stack pointer ({\tt + x2}), or + \item the destination register and the first source register are + identical, or + \item the registers used are the 8 most popular ones. +\end{tightlist} + +The C extension is compatible with all other standard instruction +extensions. The C extension allows 16-bit instructions to be freely +intermixed with 32-bit instructions, with the latter now able to start +on any 16-bit boundary. + +\begin{commentary} +Removing the 32-bit alignment constraint on the original 32-bit +instructions allows significantly greater code density. +\end{commentary} + +The compressed instruction encodings are mostly common across RV32C, +RV64C, and RV128C, but as shown in Table~\ref{rvcopcodemap}, a few +opcodes are used for different purposes depending on base ISA width. +For example, the wider address-space RV64C and RV128C variants require +additional opcodes to compress loads and stores of 64-bit integer +values, while RV32C uses the same opcodes to compress loads and stores +of single-precision floating-point values. Similarly, RV128C requires +additional opcodes to capture loads and stores of 128-bit integer +values, while these same opcodes are used for loads and stores of +double-precision floating-point values in RV32C and RV64C. If the C +extension is implemented, the appropriate compressed floating-point +load and store instructions must be provided whenever the relevant +standard floating-point extension (F and/or D) is also implemented. +In addition, RV32C includes a compressed jump and link instruction to +compress short-range subroutine calls, where the same opcode is used +to compress ADDIW for RV64C and RV128C. + +\begin{commentary} +Double-precision loads and stores are a significant fraction of static +and dynamic instructions, hence the motivation to include them in the +RV32C and RV64C encoding. + +Although single-precision loads and stores are not a significant +source of static or dynamic compression for benchmarks compiled for +the currently supported ABIs, for microcontrollers that only provide +hardware single-precision floating-point units and have an ABI that +only supports single-precision floating-point numbers, the +single-precision loads and stores will be used at least as frequently +as double-precision loads and stores in the measured benchmarks. +Hence, the motivation to provide compressed support for these in +RV32C. + +Short-range subroutine calls are more likely in small binaries for +microcontrollers, hence the motivation to include these in RV32C. + +Although reusing opcodes for different purposes for different base +register widths adds some complexity to documentation, the impact on +implementation complexity is small even for designs that support +multiple base ISA register widths. The compressed floating-point load +and store variants use the same instruction format with the same +register specifiers as the wider integer loads and stores. +\end{commentary} + +RVC was designed under the constraint that each RVC instruction +expands into a single 32-bit instruction in either the base ISA +(RV32I/E, RV64I, or RV128I) or the F and D standard extensions where +present. Adopting this constraint has two main benefits: + +\begin{tightlist} +\item Hardware designs can simply expand RVC instructions during + decode, simplifying verification and minimizing modifications to + existing microarchitectures. +\item Compilers can be unaware of the RVC extension and leave code + compression to the assembler and linker, although a + compression-aware compiler will generally be able to produce better + results. +\end{tightlist} + +\begin{commentary} +We felt the multiple complexity reductions of a simple one-one mapping +between C and base IFD instructions far outweighed the potential gains +of a slightly denser encoding that added additional instructions only +supported in the C extension, or that allowed encoding of multiple IFD +instructions in one C instruction. +\end{commentary} + +It is important to note that the C extension is not designed to be a +stand-alone ISA, and is meant to be used alongside a base ISA. + +\begin{commentary} +Variable-length instruction sets have long been used to improve code +density. For example, the IBM Stretch~\cite{stretch}, developed in +the late 1950s, had an ISA with 32-bit and 64-bit instructions, where +some of the 32-bit instructions were compressed versions of the full +64-bit instructions. Stretch also employed the concept of limiting +the set of registers that were addressable in some of the shorter +instruction formats, with short branch instructions that could only +refer to one of the index registers. The later IBM 360 +architecture~\cite{ibm360} supported a simple variable-length +instruction encoding with 16-bit, 32-bit, or 48-bit instruction +formats. + +In 1963, CDC introduced the Cray-designed CDC 6600~\cite{cdc6600}, a +precursor to RISC architectures, that introduced a register-rich +load-store architecture with instructions of two lengths, 15-bits and +30-bits. The later Cray-1 design used a very similar instruction +format, with 16-bit and 32-bit instruction lengths. + +The initial RISC ISAs from the 1980s all picked performance over code +size, which was reasonable for a workstation environment, but not for +embedded systems. Hence, both ARM and MIPS subsequently made versions +of the ISAs that offered smaller code size by offering an alternative +16-bit wide instruction set instead of the standard 32-bit wide +instructions. The compressed RISC ISAs reduced code size relative to +their starting points by about 25--30\%, yielding code that was +significantly \emph{smaller} than 80x86. This result surprised some, +as their intuition was that the variable-length CISC ISA should be +smaller than RISC ISAs that offered only 16-bit and 32-bit formats. + +Since the original RISC ISAs did not leave sufficient opcode space +free to include these unplanned compressed instructions, they were +instead developed as complete new ISAs. This meant compilers needed +different code generators for the separate compressed ISAs. The first +compressed RISC ISA extensions (e.g., ARM Thumb and MIPS16) used only +a fixed 16-bit instruction size, which gave good reductions in static +code size but caused an increase in dynamic instruction count, which +led to lower performance compared to the original fixed-width 32-bit +instruction size. This led to the development of a second generation +of compressed RISC ISA designs with mixed 16-bit and 32-bit +instruction lengths (e.g., ARM Thumb2, microMIPS, PowerPC VLE), so +that performance was similar to pure 32-bit instructions but with +significant code size savings. Unfortunately, these different +generations of compressed ISAs are incompatible with each other and +with the original uncompressed ISA, leading to significant complexity +in documentation, implementations, and software tools support. + +Of the commonly used 64-bit ISAs, only PowerPC and microMIPS currently +supports a compressed instruction format. It is surprising that the +most popular 64-bit ISA for mobile platforms (ARM v8) does not include +a compressed instruction format given that static code size and +dynamic instruction fetch bandwidth are important metrics. Although +static code size is not a major concern in larger systems, instruction +fetch bandwidth can be a major bottleneck in servers running +commercial workloads, which often have a large instruction working +set. + +Benefiting from 25 years of hindsight, RISC-V was designed to support +compressed instructions from the outset, leaving enough opcode +space for RVC to be added as a simple extension on top of the base ISA +(along with many other extensions). The philosophy of RVC is to +reduce code size for embedded applications \emph{and} to improve +performance and energy-efficiency for all applications due to fewer +misses in the instruction cache. Waterman shows that RVC fetches +25\%-30\% fewer instruction bits, which reduces instruction cache +misses by 20\%-25\%, or roughly the same performance impact as +doubling the instruction cache size~\cite{waterman-ms}. +\end{commentary} + + +\section{Compressed Instruction Formats} + +Table~\ref{formats} shows the eight compressed instruction +formats. CR, CI, and CSS can use any of the 32 RVI registers, but CIW, +CL, CS, and CB are limited to just 8 of them. Table~\ref{registers} +lists these popular registers, which correspond to registers {\tt x8} +to {\tt x15}. Note that there is a +separate version of load and store instructions that use the stack +pointer as the base address register, since saving to and restoring +from the stack are so prevalent, and that they use the CI and CSS +formats to allow access to all 32 data registers. CIW supplies an +8-bit immediate for the ADDI4SPN instruction. + +\begin{commentary} +The RISC-V ABI was changed to make the frequently used registers map +to registers {\tt x8}--{\tt x15}. This simplifies the decompression +decoder by having a contiguous naturally aligned set of register +numbers, and is also compatible with the RV32E subset base +specification, which only has 16 integer registers. +\end{commentary} + +Compressed register-based floating-point loads and stores also use the +CL and CS formats respectively, with the eight registers mapping to +{\tt f8} to {\tt f15}. + +\begin{commentary} +The standard RISC-V calling convention maps the most frequently used +floating-point registers to registers {\tt f8} to {\tt f15}, which +allows the same register decompression decoding as for integer +register numbers. +\end{commentary} + +The formats were designed to keep bits for the two register source +specifiers in the same place in all instructions, while the +destination register field can move. When the full 5-bit destination +register specifier is present, it is in the same place as in the +32-bit RISC-V encoding. Where immediates are +sign-extended, the sign-extension is always from bit 12. Immediate +fields have been scrambled, as in the base specification, to reduce +the number of immediate muxes required. + +\begin{commentary} +The immediate fields are scrambled in the instruction formats instead +of in sequential order so that as many bits as possible are in the +same position in every instruction, thereby simplifying +implementations. For example, immediate bits 17---10 are always sourced from +the same instruction bit positions. Five other immediate bits (5, 4, +3, 1, and 0) have just two source instruction bits, while four (9, 7, +6, and 2) have three sources and one (8) has four sources. +\end{commentary} + +For many RVC instructions, zero-valued immediates are disallowed and +{\tt x0} is not a valid 5-bit register specifier. These restrictions +free up encoding space for other instructions requiring fewer operand +bits. + +\begin{table}[h] +{ +\begin{small} +\begin{center} +\begin{tabular}{c c p{0in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}} +& & & & & & & & & \\ +Format & Meaning & +\instbit{15} & +\instbit{14} & +\instbit{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\multicolumn{1}{r}{\instbit{5}} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\cline{3-18} + +CR & Register & +\multicolumn{4}{|c|}{funct4} & +\multicolumn{5}{c|}{rd/rs1} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CI & Immediate & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{5}{c|}{rd/rs1} & +\multicolumn{5}{c|}{imm} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CSS & Stack-relative Store & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{6}{c|}{imm} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CIW & Wide Immediate & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{8}{c|}{imm} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CL & Load & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{3}{c|}{imm} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CS & Store & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{3}{c|}{imm} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CB & Branch & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{3}{c|}{offset} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{5}{c|}{offset} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +CJ & Jump & +\multicolumn{3}{|c|}{funct3} & +\multicolumn{11}{c|}{jump target} & +\multicolumn{2}{c|}{op} \\ +\cline{3-18} + +\end{tabular} +\end{center} +\end{small} +} +\caption{Compressed 16-bit RVC instruction formats.} +\label{formats} +\end{table} + + +\begin{table}[H] +{ +\begin{center} +\begin{tabular}{l|c|c|c|c|c|c|c|c|} +\cline{2-9} +RVC Register Number & 000 & 001 & 010 & 011 & 100 & 101 & 110 & 111 +\\ \cline{2-9} +Integer Register Number & {\tt x8} & {\tt x9} & {\tt x10} & {\tt x11} & {\tt x12} & {\tt x13} & {\tt x14} & {\tt x15} \\ \cline{2-9} +Integer Register ABI Name & {\tt s0} & {\tt s1} & {\tt a0} & {\tt a1} & {\tt a2} & {\tt a3} & {\tt a4} & {\tt a5} \\ \cline{2-9} +Floating-Point Register Number & {\tt f8} & {\tt f9} & {\tt f10} & {\tt f11} & {\tt f12} & {\tt f13} & {\tt f14} & {\tt f15} \\ \cline{2-9} +Floating-Point Register ABI Name & {\tt fs0} & {\tt fs1} & {\tt fa0} & {\tt fa1} & {\tt fa2} & {\tt fa3} & {\tt fa4} & {\tt fa5} \\ \cline{2-9} +\end{tabular} +\end{center} +} +\caption{Registers specified by the three-bit rs1', rs2', and rd' fields of the CIW, CL, CS, and CB formats.} +\label{registers} +\end{table} + +\section{Load and Store Instructions} + +To increase the reach of 16-bit instructions, data-transfer +instructions use zero-extended immediates that are scaled by the size +of the data in bytes: $\times$4 for words, $\times$8 for double words, +and $\times$16 for quad words. + +RVC provides two variants of loads and stores. One uses the ABI stack +pointer, {\tt x2}, as the base address and can target any data register. The +other can reference one of 8 base address registers and one of 8 data +registers. + +\subsection*{Stack-Pointer-Based Loads and Stores} + +\begin{center} +\begin{tabular}{S@{}W@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +C.LWSP & offset[5] & dest$\neq$0 & offset[4:2$\vert$7:6] & C2 \\ +C.LDSP & offset[5] & dest$\neq$0 & offset[4:3$\vert$8:6] & C2 \\ +C.LQSP & offset[5] & dest$\neq$0 & offset[4$\vert$9:6] & C2 \\ +C.FLWSP& offset[5] & dest & offset[4:2$\vert$7:6] & C2 \\ +C.FLDSP& offset[5] & dest & offset[4:3$\vert$8:6] & C2 \\ +\end{tabular} +\end{center} +These instructions use the CI format. + +C.LWSP loads a 32-bit value from memory into register {\em rd}. It computes +an effective address by adding the {\em zero}-extended offset, scaled by 4, to +the stack pointer, {\tt x2}. It expands to {\tt lw rd, offset[7:2](x2)}. + +C.LDSP is an RV64C/RV128C-only instruction that loads a 64-bit value from memory into +register {\em rd}. It computes its effective address by adding the +zero-extended offset, scaled by 8, to the stack pointer, {\tt x2}. +It expands to {\tt ld rd, offset[8:3](x2)}. + +C.LQSP is an RV128C-only instruction that loads a 128-bit value from memory +into register {\em rd}. It computes its effective address by adding the +zero-extended offset, scaled by 16, to the stack pointer, {\tt x2}. +It expands to {\tt lq rd, offset[9:4](x2)}. + +C.FLWSP is an RV32FC-only instruction that loads a single-precision +floating-point value from memory into floating-point register {\em rd}. It +computes its effective address by adding the {\em zero}-extended offset, +scaled by 4, to the stack pointer, {\tt x2}. It expands to {\tt flw rd, +offset[7:2](x2)}. + +C.FLDSP is an RV32DC/RV64DC-only instruction that loads a double-precision +floating-point value from memory into floating-point register {\em rd}. It +computes its effective address by adding the {\em zero}-extended offset, +scaled by 8, to the stack pointer, {\tt x2}. It expands to {\tt fld rd, +offset[8:3](x2)}. + +\begin{center} +\begin{tabular}{S@{}M@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 6 & 5 & 2 \\ +C.SWSP & offset[5:2$\vert$7:6] & src & C2 \\ +C.SDSP & offset[5:3$\vert$8:6] & src & C2 \\ +C.SQSP & offset[5:4$\vert$9:6] & src & C2 \\ +C.FSWSP& offset[5:2$\vert$7:6] & src & C2 \\ +C.FSDSP& offset[5:3$\vert$8:6] & src & C2 \\ +\end{tabular} +\end{center} +These instructions use the CSS format. + +C.SWSP stores a 32-bit value in register {\em rs2} to memory. It computes +an effective address by adding the {\em zero}-extended offset, scaled by 4, to +the stack pointer, {\tt x2}. +It expands to {\tt sw rs2, offset[7:2](x2)}. + +C.SDSP is an RV64C/RV128C-only instruction that stores a 64-bit value in register +{\em rs2} to memory. It computes an effective address by adding the {\em +zero}-extended offset, scaled by 8, to the stack pointer, {\tt x2}. +It expands to {\tt sd rs2, offset[8:3](x2)}. + +C.SQSP is an RV128C-only instruction that stores a 128-bit value in register +{\em rs2} to memory. It computes an effective address by adding the {\em +zero}-extended offset, scaled by 16, to the stack pointer, {\tt x2}. +It expands to {\tt sq rs2, offset[9:4](x2)}. + +C.FSWSP is an RV32FC-only instruction that stores a single-precision +floating-point value in floating-point register {\em rs2} to memory. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 4, to the stack pointer, {\tt x2}. It expands to {\tt fsw rs2, +offset[7:2](x2)}. + +C.FSDSP is an RV32DC/RV64DC-only instruction that stores a double-precision +floating-point value in floating-point register {\em rs2} to memory. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 8, to the stack pointer, {\tt x2}. It expands to {\tt fsd rs2, +offset[8:3](x2)}. + +\begin{commentary} +Register save/restore code at function entry/exit represents a +significant portion of static code size. The stack-pointer-based +compressed loads and stores in RVC are effective at reducing the +save/restore static code size by a factor of 2 while improving +performance by reducing dynamic instruction bandwidth. + +A common mechanism used in other ISAs to further reduce +save/restore code size is load-multiple and store-multiple +instructions. We considered adopting these for RISC-V but noted the +following drawbacks to these instructions: +\begin{itemize} +\item These instructions complicate processor implementations. +\item For virtual memory systems, some data accesses could be + resident in physical memory and some could not, which requires a + new restart mechanism for partially executed instructions. +\item Unlike the rest of the RVC instructions, there is no IFD + equivalent to Load Multiple and Store Multiple. +\item Unlike the rest of the RVC instructions, the compiler would + have to be aware of these instructions to both generate the + instructions and to allocate registers in an order to maximize + the chances of the them being saved and stored, since they would + be saved and restored in sequential order. +\item Simple microarchitectural implementations will constrain how + other instructions can be scheduled around the load and store + multiple instructions, leading to a potential performance loss. +\item The desire for sequential register allocation might conflict with + the featured registers selected for the CIW, CL, CS, and CB formats. +\end{itemize} +Furthermore, much of the gains can be realized in software by replacing +prologue and epilogue code with subroutine calls to common +prologue and epilogue code, a technique described in +Section 5.6 of~\cite{waterman-phd}. + +While reasonable architects might come to different conclusions, we +decided to omit load and store multiple and instead use the +software-only approach of calling save/restore millicode routines to +attain the greatest code size reduction. +\end{commentary} + +\subsection*{Register-Based Loads and Stores} + +\begin{center} +\begin{tabular}{S@{}S@{}S@{}Y@{}S@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{5} & +\instbitrange{4}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rs1$'$} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rd$'$} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 3 & 3 & 2 & 3 & 2 \\ +C.LW & offset[5:3] & base & offset[2$\vert$6] & dest & C0 \\ +C.LD & offset[5:3] & base & offset[7:6] & dest & C0 \\ +C.LQ & offset[5$\vert$4$\vert$8] & base & offset[7:6] & dest & C0 \\ +C.FLW& offset[5:3] & base & offset[2$\vert$6] & dest & C0 \\ +C.FLD& offset[5:3] & base & offset[7:6] & dest & C0 \\ +\end{tabular} +\end{center} +These instructions use the CL format. + +C.LW loads a 32-bit value from memory into register {\em rd$'$}. It computes +an effective address by adding the {\em zero}-extended offset, scaled by 4, to +the base address in register {\em rs1$'$}. +It expands to {\tt lw rd$'$, offset[6:2](rs1$'$)}. + +C.LD is an RV64C/RV128C-only instruction that loads a 64-bit value from memory into +register {\em rd$'$}. It computes an effective address by adding the {\em +zero}-extended offset, scaled by 8, to the base address in register {\em +rs1$'$}. +It expands to {\tt ld rd$'$, offset[7:3](rs1$'$)}. + +C.LQ is an RV128C-only instruction that loads a 128-bit value from memory into +register {\em rd$'$}. It computes an effective address by adding the {\em +zero}-extended offset, scaled by 16, to the base address in register {\em +rs1$'$}. +It expands to {\tt lq rd$'$, offset[8:4](rs1$'$)}. + +C.FLW is an RV32FC-only instruction that loads a single-precision +floating-point value from memory into floating-point register {\em rd$'$}. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 4, to the base address in register {\em rs1$'$}. It expands to {\tt flw +rd$'$, offset[6:2](rs1$'$)}. + +C.FLD is an RV32DC/RV64DC-only instruction that loads a double-precision +floating-point value from memory into floating-point register {\em rd$'$}. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 8, to the base address in register {\em rs1$'$}. It expands to {\tt fld +rd$'$, offset[7:3](rs1$'$)}. + +\begin{center} +\begin{tabular}{S@{}S@{}S@{}Y@{}S@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{5} & +\instbitrange{4}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rs1$'$} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rs2$'$} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 3 & 3 & 2 & 3 & 2 \\ +C.SW & offset[5:3] & base & offset[2$\vert$6] & src & C0 \\ +C.SD & offset[5:3] & base & offset[7:6] & src & C0 \\ +C.SQ & offset[5$\vert$4$\vert$8] & base & offset[7:6] & src & C0 \\ +C.FSW& offset[5:3] & base & offset[2$\vert$6] & src & C0 \\ +C.FSD& offset[5:3] & base & offset[7:6] & src & C0 \\ +\end{tabular} +\end{center} +These instructions use the CS format. + +C.SW stores a 32-bit value in register {\em rs2$'$} to memory. It computes an +effective address by adding the {\em zero}-extended offset, scaled by 4, to +the base address in register {\em rs1$'$}. +It expands to {\tt sw rs2$'$, offset[6:2](rs1$'$)}. + +C.SD is an RV64C/RV128C-only instruction that stores a 64-bit value in +register {\em rs2$'$} to memory. It computes an effective address by adding +the {\em zero}-extended offset, scaled by 8, to the base address in register +{\em rs1$'$}. +It expands to {\tt sd rs2$'$, offset[7:3](rs1$'$)}. + +C.SQ is an RV128C-only instruction that stores a 128-bit value in register +{\em rs2$'$} to memory. It computes an effective address by adding the {\em +zero}-extended offset, scaled by 16, to the base address in register {\em +rs1$'$}. +It expands to {\tt sq rs2$'$, offset[8:4](rs1$'$)}. + +C.FSW is an RV32FC-only instruction that stores a single-precision +floating-point value in floating-point register {\em rs2$'$} to memory. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 4, to the base address in register {\em rs1$'$}. It expands to {\tt fsw +rs2$'$, offset[6:2](rs1$'$)}. + +C.FSD is an RV32DC/RV64DC-only instruction that stores a double-precision +floating-point value in floating-point register {\em rs2$'$} to memory. It +computes an effective address by adding the {\em zero}-extended offset, scaled +by 8, to the base address in register {\em rs1$'$}. It expands to {\tt fsd +rs2$'$, offset[7:3](rs1$'$)}. + +\section{Control Transfer Instructions} + +RVC provides unconditional jump instructions and conditional branch +instructions. As with base RVI instructions, the offsets of all RVC +control transfer instruction are in multiples of 2 bytes. + +\begin{center} +\begin{tabular}{S@{}L@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 11 & 2 \\ +C.J & offset[11$\vert$4$\vert$9:8$\vert$10$\vert$6$\vert$7$\vert$3:1$\vert$5] & C1 \\ +C.JAL & offset[11$\vert$4$\vert$9:8$\vert$10$\vert$6$\vert$7$\vert$3:1$\vert$5] & C1 \\ +\end{tabular} +\end{center} +These instructions use the CJ format. + +C.J performs an unconditional control transfer. The offset is sign-extended and +added to the {\tt pc} to form the jump target address. C.J can therefore target +a $\pm$\wunits{2}{KiB} range. C.J expands to {\tt jal x0, offset[11:1]}. + +C.JAL is an RV32C-only instruction that performs the same operation as C.J, +but additionally writes the address of the instruction following the jump +({\tt pc}+2) to the link register, {\tt x1}. C.JAL expands to {\tt jal x1, +offset[11:1]}. + +\begin{center} +\begin{tabular}{E@{}T@{}T@{}Y} +\\ +\instbitrange{15}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct4} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{op} \\ +\hline +4 & 5 & 5 & 2 \\ +C.JR & src$\neq$0 & 0 & C2 \\ +C.JALR & src$\neq$0 & 0 & C2 \\ +\end{tabular} +\end{center} +These instructions use the CR format. + +C.JR (jump register) performs an unconditional control transfer to +the address in register {\em rs1}. C.JR expands to {\tt jalr x0, rs1, 0}. + +C.JALR (jump and link register) performs the same operation as C.JR, +but additionally writes the address of the instruction following the +jump ({\tt pc}+2) to the link register, {\tt x1}. C.JALR expands to +{\tt jalr x1, rs1, 0}. + +\begin{commentary} +Strictly speaking, C.JALR does not expand exactly to a base RVI +instruction as the value added to the PC to form the link address is 2 +rather than 4 as in the base ISA, but supporting both offsets of 2 and +4 bytes is only a very minor change to the base microarchitecture. +\end{commentary} + +\begin{center} +\begin{tabular}{S@{}S@{}S@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rs1$'$} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 3 & 3 & 5 & 2 \\ +C.BEQZ & offset[8$\vert$4:3] & src & offset[7:6$\vert$2:1$\vert$5] & C1 \\ +C.BNEZ & offset[8$\vert$4:3] & src & offset[7:6$\vert$2:1$\vert$5] & C1 \\ +\end{tabular} +\end{center} +These instructions use the CB format. + +C.BEQZ performs conditional control transfers. The offset is sign-extended +and added to the {\tt pc} to form the branch target address. It can +therefore target a $\pm$\wunits{256}{B} range. C.BEQZ takes the branch if the +value in register {\em rs1$'$} is zero. It expands to {\tt beq rs1$'$, x0, +offset[8:1]}. + +C.BNEZ is defined analogously, but it takes the branch if {\em rs1$'$} contains +a nonzero value. It expands to {\tt bne rs1$'$, x0, offset[8:1]}. + +\section{Integer Computational Instructions} + +RVC provides several instructions for integer arithmetic and constant generation. + +\subsection*{Integer Constant-Generation Instructions} + +The two constant-generation instructions both use the CI instruction +format and can target any integer register. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{S@{}W@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +C.LI & imm[5] & dest$\neq$0 & imm[4:0] & C1 \\ +C.LUI & nzimm[17] & $\textrm{dest}{\neq}{\left\{0,2\right\}}$ & nzimm[16:12] & C1 \\ +\end{tabular} +\end{center} +C.LI loads the sign-extended 6-bit immediate, {\em imm}, into +register {\em rd}. C.LI is only valid when {\em rd}$\neq${\tt x0}. +C.LI expands into {\tt addi rd, x0, imm[5:0]}. + +C.LUI loads the non-zero 6-bit immediate field into bits 17--12 of the +destination register, clears the bottom 12 bits, and sign-extends bit +17 into all higher bits of the destination. C.LUI is only valid when +$\textit{rd}{\neq}{\left\{\texttt{x0},\texttt{x2}\right\}}$, +and when the immediate is not equal to zero. +C.LUI expands into {\tt lui rd, nzimm[17:12]}. + +\subsection*{Integer Register-Immediate Operations} + +These integer register-immediate operations are encoded in the CI +format and perform operations on any non-{\tt x0} integer register and +a 6-bit immediate. The immediate cannot be zero. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{S@{}W@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{1}{c|}{rd/rs1} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +C.ADDI & nzimm[5] & dest & nzimm[4:0] & C1 \\ +C.ADDIW & imm[5] & dest$\neq$0 & imm[4:0] & C1 \\ +C.ADDI16SP & nzimm[9] & 2 & nzimm[4$\vert$6$\vert$8:7$\vert$5] & C1 \\ +\end{tabular} +\end{center} + +C.ADDI adds the non-zero sign-extended 6-bit immediate to the value in +register {\em rd} then writes the result to {\em rd}. C.ADDI expands +into {\tt addi rd, rd, nzimm[5:0]}. + +C.ADDIW is an RV64C/RV128C-only instruction that performs the same +computation but produces a 32-bit result, then sign-extends result to +64 bits. C.ADDIW expands into {\tt addiw rd, rd, imm[5:0]}. The +immediate can be zero for C.ADDIW, where this corresponds to {\tt +sext.w rd}. + +C.ADDI16SP shares the opcode with C.LUI, but has a destination field +of {\tt x2}. C.ADDI16SP adds the non-zero sign-extended 6-bit immediate to +the value in the stack pointer ({\tt sp}={\tt x2}), where the +immediate is scaled to represent multiples of 16 in the range +(-512,496). C.ADDI16SP is used to adjust the stack pointer in procedure +prologues and epilogues. It expands into {\tt addi x2, x2, nzimm[9:4]}. + +\begin{commentary} +In the standard RISC-V calling convention, the stack pointer {\tt sp} +is always 16-byte aligned. +\end{commentary} + +\begin{center} +\begin{tabular}{@{}S@{}K@{}S@{}Y} +\\ +\instbitrange{15}{13} & +\instbitrange{12}{5} & +\instbitrange{4}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm} & +\multicolumn{1}{c|}{rd$'$} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 8 & 3 & 2 \\ +C.ADDI4SPN & zimm[5:4$\vert$9:6$\vert$2$\vert$3] & dest & C0 \\ +\end{tabular} +\end{center} + +C.ADDI4SPN is a CIW-format RV32C/RV64C-only instruction that adds a +{\em zero}-extended non-zero immediate, scaled by 4, to the stack pointer, +{\tt x2}, and writes the result to {\tt rd$'$}. This instruction is used +to generate pointers to stack-allocated variables, and expands to +{\tt addi rd$'$, x2, zimm[9:2]}. + + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{S@{}W@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{shamt[5]} & +\multicolumn{1}{c|}{rd/rs1} & +\multicolumn{1}{c|}{shamt[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +C.SLLI & shamt[5] & dest$\neq$0 & shamt[4:0] & C2 \\ +\end{tabular} +\end{center} + +C.SLLI is a CI-format instruction that performs a logical left shift +of the value in register {\em rd} then writes the result to {\em rd}. +The shift amount is encoded in the {\em shamt} field, where {\em + shamt[5]} must be zero for RV32C. For RV32C and RV64C, the shift +amount must be non-zero. For RV128C, a shift amount of zero is used +to encode a shift of 64. C.SLLI expands into {\tt slli rd, rd, + shamt[5:0]}, except for RV128C with {\tt shamt=0}, which expands to +{\tt slli rd, rd, 64}. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{S@{}W@{}Y@{}S@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{shamt[5]} & +\multicolumn{1}{|c|}{funct2} & +\multicolumn{1}{c|}{rd$'$/rs1$'$} & +\multicolumn{1}{c|}{shamt[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 2 & 3 & 5 & 2 \\ +C.SRLI & shamt[5] & C.SRLI & dest & shamt[4:0] & C1 \\ +C.SRAI & shamt[5] & C.SRAI & dest & shamt[4:0] & C1 \\ +\end{tabular} +\end{center} + +C.SRLI is a CB-format instruction that performs a logical right shift +of the value in register {\em rd$'$} then writes the result to {\em rd$'$}. +The shift amount is encoded in the {\em shamt} field, where {\em + shamt[5]} must be zero for RV32C. For RV32C and RV64C, the shift +amount must be non-zero. For RV128C, a shift amount of zero is used +to encode a shift of 64. Furthermore, the shift amount is sign-extended +for RV128C, and so the legal shift amounts are 1--31, 64, and 96--127. +C.SRLI expands into {\tt srli rd$'$, rd$'$, shamt[5:0]}, +except for RV128C with {\tt shamt=0}, which expands to +{\tt srli rd$'$, rd$'$, 64}. + +C.SRAI is defined analogously to C.SRLI, but instead performs an arithmetic +right shift. +C.SRAI expands to {\tt srai rd$'$, rd$'$, shamt[5:0]}. + +\begin{commentary} +Left shifts are usually more frequent than right shifts, as left +shifts are frequently used to scale address values. Right shifts have +therefore been granted less encoding space and are placed in an +encoding quadrant where all other immediates are sign-extended. For +RV128, the decision was made to have the 6-bit shift-amount immediate +also be sign-extended. Apart from reducing the decode complexity, we +believe right-shift amounts of 96--127 will be more useful than 64--95, +to allow extraction of tags located in the high portions of 128-bit +address pointers. We note that RV128C will not be frozen at the same +point as RV32C and RV64C, to allow evaluation of typical usage of +128-bit address-space codes. +\end{commentary} + +\begin{center} +\begin{tabular}{S@{}W@{}Y@{}S@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{1}{|c|}{funct2} & +\multicolumn{1}{c|}{rd$'$/rs1$'$} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 2 & 3 & 5 & 2 \\ +C.ANDI & imm[5] & C.ANDI & dest & imm[4:0] & C1 \\ +\end{tabular} +\end{center} + +C.ANDI is a CB-format instruction that computes the bitwise AND of +of the value in register {\em rd$'$} and the sign-extended 6-bit immediate, +then writes the result to {\em rd$'$}. +C.ANDI expands to {\tt andi rd$'$, rd$'$, imm[5:0]}. + +\subsection*{Integer Register-Register Operations} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{E@{}T@{}T@{}Y} +\\ +\instbitrange{15}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct4} & +\multicolumn{1}{c|}{rd/rs1} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{op} \\ +\hline +4 & 5 & 5 & 2 \\ +C.MV & dest$\neq$0 & src$\neq$0 & C0 \\ +C.ADD & dest$\neq$0 & src$\neq$0 & C0 \\ +\end{tabular} +\end{center} +These instructions use the CR format. + +C.MV copies the value in register {\em rs2} into register {\em rd}. C.MV +expands into {\tt add rd, x0, rs2}. + +C.ADD adds the values in registers {\em rd} and {\em rs2} and writes the +result to register {\em rd}. C.ADD expands into {\tt add rd, rd, rs2}. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}S@{}Y@{}S@{}Y} +\\ +\instbitrange{15}{10} & +\instbitrange{9}{7} & +\instbitrange{6}{5} & +\instbitrange{4}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct6} & +\multicolumn{1}{c|}{rd$'$/rs1$'$} & +\multicolumn{1}{c|}{funct} & +\multicolumn{1}{c|}{rs2$'$} & +\multicolumn{1}{c|}{op} \\ +\hline +6 & 3 & 2 & 3 & 2 \\ +C.AND & dest & C.AND & src & C1 \\ +C.OR & dest & C.OR & src & C1 \\ +C.XOR & dest & C.XOR & src & C1 \\ +C.SUB & dest & C.SUB & src & C1 \\ +C.ADDW & dest & C.ADDW & src & C1 \\ +C.SUBW & dest & C.SUBW & src & C1 \\ +\end{tabular} +\end{center} + +These instructions use the CS format. + +C.AND computes the bitwise AND of the values in registers {\em rd$'$} +and {\em rs2$'$}, then writes the result to register {\em rd$'$}. +C.AND expands into {\tt and rd$'$, rd$'$, rs2$'$}. + +C.OR computes the bitwise OR of the values in registers {\em rd$'$} +and {\em rs2$'$}, then writes the result to register {\em rd$'$}. +C.OR expands into {\tt or rd$'$, rd$'$, rs2$'$}. + +C.XOR computes the bitwise XOR of the values in registers {\em rd$'$} +and {\em rs2$'$}, then writes the result to register {\em rd$'$}. +C.XOR expands into {\tt xor rd$'$, rd$'$, rs2$'$}. + +C.SUB subtracts the value in register {\em rs2$'$} from the value in +register {\em rd$'$}, then writes the result to register {\em rd$'$}. +C.SUB expands into {\tt sub rd$'$, rd$'$, rs2$'$}. + +C.ADDW is an RV64C/RV128C-only instruction that adds the values in +registers {\em rd$'$} and {\em rs2$'$}, then sign-extends the lower +32 bits of the sum before writing the result to register {\em rd$'$}. +C.ADDW expands into {\tt addw rd$'$, rd$'$, rs2$'$}. + +C.SUBW is an RV64C/RV128C-only instruction that subtracts the value in +register {\em rs2$'$} from the value in register {\em rd$'$}, then +sign-extends the lower 32 bits of the difference before writing the result +to register {\em rd$'$}. C.SUBW expands into {\tt subw rd$'$, rd$'$, rs2$'$}. + +\begin{commentary} +This group of six instructions do not provide large savings +individually, but do not occupy much encoding space and are +straightforward to implement, and as a group provide a worthwhile +improvement in static and dynamic compression. +\end{commentary} + +\subsection*{Defined Illegal Instruction} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{SW@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{0} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +0 & 0 & 0 & 0 & 0 \\ +\end{tabular} +\end{center} + +A 16-bit instruction with all bits zero is permanently reserved as an +illegal instruction. +\begin{commentary} +We reserve all-zero instructions to be illegal instructions to help +trap attempts to execute zero-ed or non-existent portions of the +memory space. The all-zero value should not be redefined in any +non-standard extension. Similarly, we reserve instructions with all +bits set to 1 (corresponding to very long instructions in the RISC-V +variable-length encoding scheme) as illegal to capture another common +value seen in non-existent memory regions. +\end{commentary} + +\subsection*{NOP Instruction} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{SW@{}T@{}T@{}Y} +\\ +\instbitrange{15}{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbitrange{11}{7} & +\instbitrange{6}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct3} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{1}{c|}{rd/rs1} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{op} \\ +\hline +3 & 1 & 5 & 5 & 2 \\ +C.NOP & 0 & 0 & 0 & C1 \\ +\end{tabular} +\end{center} + +C.NOP is a CI-format instruction that does not change any user-visible state, +except for advancing the {\tt pc}. C.NOP is encoded as {\tt c.addi x0, 0} and +so expands to {\tt addi x0, x0, 0}. + +\subsection*{Breakpoint Instruction} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{E@{}U@{}Y} +\\ +\instbitrange{15}{12} & +\instbitrange{11}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{funct4} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{op} \\ +\hline +4 & 10 & 2 \\ +C.EBREAK & 0 & C0 \\ +\end{tabular} +\end{center} + +Debuggers can use the C.EBREAK instruction, which expands to {\tt ebreak}, +to cause control to be transferred back to the debugging environment. +C.EBREAK shares the opcode with the C.ADD instruction, but with {\em + rd} and {\em rs2} both zero, thus can also use the CR format. + +\section{Usage of C Instructions in LR/SC Sequences} + +On implementations that support the C extension, compressed forms of +the I instructions permitted inside LR/SC sequences can be used while +retaining the guarantee of eventual success, as described in +Section~\ref{lrscseq}. + +\begin{commentary} +The implication is that any implementation that claims to support both +the A and C extensions must ensure that LR/SC sequences containing +valid C instructions will eventually complete. +\end{commentary} + +\clearpage + +\section{RVC Instruction Set Listings} + +Table~\ref{rvcopcodemap} shows a map of the major opcodes for RVC. +Opcodes with the lower two bits set correspond to instructions wider +than 16 bits, including those in the base ISAs. Several instructions +are only valid for certain operands; when invalid, they are marked +either {\em RES} to indicate that the opcode is reserved for future +standard extensions; {\em NSE} to indicate that the opcode is reserved +for non-standard extensions; or {\em HINT} to indicate that the opcode +is reserved for future standard microarchitectural hints. +Instructions marked {\em HINT} must execute as no-ops on +implementations for which the hint has no effect. + +\begin{commentary} +The HINT instructions are designed to support future addition of +microarchitectural hints that might affect performance but cannot +affect architectural state. The HINT encodings have been chosen so +that simple implementations can ignore the HINT encoding and execute +the HINT as a regular operation that does not change architectural +state. For example, C.ADD is a HINT if the destination register is +{\tt x0}, where the five-bit rs2 field encodes details of the HINT. +However, a simple implementation can simply execute the HINT as an add +to register {\tt x0}, which will have no effect. +\end{commentary} + +\input{rvc-opcode-map} + +Tables~\ref{rvc-instr-table0}--\ref{rvc-instr-table2} list the RVC instructions. +\input{rvc-instr-table} diff --git a/src/calling.tex b/src/calling.tex new file mode 100644 index 0000000..55c55e1 --- /dev/null +++ b/src/calling.tex @@ -0,0 +1,198 @@ +\chapter{Calling Convention} +\label{sec:calling} + +This chapter describes the C compiler standards for RV32 and RV64 programs +and two calling conventions: the convention for the base ISA +plus standard general extensions (RV32G/RV64G), and the soft-float +convention for implementations lacking floating-point units (e.g., RV32I/RV64I). + +\begin{commentary} +Implementations with ISA extensions might require extended calling +conventions. +\end{commentary} + +\section{C Datatypes and Alignment} + +Table~\ref{datatypes} summarizes the datatypes natively supported by +RISC-V C programs. In both RV32 and RV64 C compilers, the C type {\tt + int} is 32 bits wide. {\tt long}s and pointers, on the other hand, +are both as wide as a integer register, so in RV32, both are 32 bits +wide, while in RV64, both are 64 bits wide. Equivalently, RV32 +employs an ILP32 integer model, while RV64 is LP64. In both RV32 and +RV64, the C type {\tt long long} is a 64-bit integer, {\tt float} is a +32-bit IEEE 754-2008 floating-point number, {\tt double} is a 64-bit +IEEE 754-2008 floating-point number, and {\tt long double} is a +128-bit IEEE floating-point number. + +The C types {\tt char} and {\tt unsigned char} are 8-bit unsigned integers and +are zero-extended when stored in a RISC-V integer register. {\tt unsigned +short} is a 16-bit unsigned integer and is zero-extended when stored in +a RISC-V integer register. {\tt signed char} is an 8-bit signed integer and +is sign-extended when stored in a RISC-V integer register, i.e. bits +(XLEN-1)..7 are all equal. {\tt short} is a 16-bit signed integer and is +sign-extended when stored in a register. + +In RV64, 32-bit types, such as {\tt int}, are stored in integer registers +as proper sign extensions of their 32-bit values; that is, bits 63..31 are all +equal. This restriction holds even for unsigned 32-bit types. + +The RV32 and RV64 C compiler and compliant software keep all of the +above datatypes naturally aligned when stored in memory. + +\vspace{0.2in} +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|l|l|r|r|} + + \hline + C type & Description & Bytes in RV32 & Bytes in RV64 \\ \hline + \tt char & Character value/byte & 1 & 1 \\ + \tt short & Short integer & 2 & 2 \\ + \tt int & Integer & 4 & 4 \\ + \tt long & Long integer & 4 & 8 \\ + \tt long long & Long long integer & 8 & 8 \\ + \tt void* & Pointer & 4 & 8 \\ + \tt float & Single-precision float & 4 & 4 \\ + \tt double & Double-precision float & 8 & 8 \\ + \tt long double & Extended-precision float & 16 & 16 \\ + \hline + + \end{tabular} +\end{center} +\caption{C compiler datatypes for base RISC-V ISA.} +\label{datatypes} +\end{table*} + + +\section{RVG Calling Convention} + +The RISC-V calling convention passes arguments in registers when +possible. Up to eight integer registers, {\tt a0}--{\tt a7}, +and up to eight floating-point registers, {\tt fa0}--{\tt fa7}, +are used for this purpose. + +If the arguments to a function are conceptualized as fields of a C +{\tt struct}, each with pointer alignment, the argument registers are +a shadow of the first eight pointer-words of that {\tt struct}. If +argument $i<8$ is a floating-point type, it is passed in +floating-point register {\tt fa}$i$; otherwise, it is passed in +integer register {\tt a}$i$. However, floating-point arguments +that are part of {\tt union}s or array fields of structures are passed +in integer registers. Additionally, floating-point arguments to +variadic functions (except those that are explicitly named in the +parameter list) are passed in integer registers. + +Arguments smaller than a pointer-word are passed in the least-significant bits +of argument registers. Correspondingly, sub-pointer-word arguments passed on +the stack appear in the lower addresses of a pointer-word, since RISC-V +has a little-endian memory system. + +When primitive arguments twice the size of a pointer-word are passed on the +stack, they are naturally aligned. When they are passed in the integer +registers, they reside in an aligned even-odd register pair, with the even +register holding the least-significant bits. In RV32, for example, the function +{\tt void foo(int, long long)} is passed its first argument in +{\tt a0} and its second in {\tt a2} and {\tt a3}. Nothing is passed +in {\tt a1}. + +Arguments more than twice the size of a pointer-word are passed by reference. + +The portion of the conceptual {\tt struct} that is not passed in argument +registers is passed on the stack. The stack pointer {\tt sp} points to the +first argument not passed in a register. + +Values are returned from functions in integer registers {\tt a0} and {\tt +a1} and floating-point registers {\tt fa0} and {\tt fa1}. Floating-point +values are returned in floating-point registers only if they are primitives or +members of a {\tt struct} consisting of only one or two floating-point values. +Other return values that fit into two pointer-words are returned in {\tt a0} +and {\tt a1}. Larger return values are passed entirely in memory; the caller +allocates this memory region and passes a pointer to it as an implicit first +parameter to the callee. + +In the standard RISC-V calling convention, the stack grows downward and the +stack pointer is always kept 16-byte aligned. + +In addition to the argument and return value registers, +seven integer registers {\tt t0}--{\tt t6} and twelve floating-point registers +{\tt ft0}--{\tt ft11} are temporary registers that are volatile across +calls and must be saved by the caller if later used. +Twelve integer registers {\tt s0}--{\tt s11} and twelve floating-point +registers {\tt fs0}--{\tt fs11} are preserved across calls and must be saved +by the callee if used. Table~\ref{regmap} indicates the role of each +integer and floating-point register in the calling convention. + +\vspace{0.2in} +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|l|l|l|l|} + + \hline + Register & ABI Name & Description & Saver \\ \hline + \tt x0 & \tt zero & Hard-wired zero & --- \\ + \tt x1 & \tt ra & Return address & Caller \\ + \tt x2 & \tt sp & Stack pointer & Callee \\ + \tt x3 & \tt gp & Global pointer & --- \\ + \tt x4 & \tt tp & Thread pointer & --- \\ + {\tt x5}--{\tt 7} & {\tt t0}--{\tt 2} & Temporaries & Caller \\ + \tt x8 & {\tt s0}/\tt fp & Saved register/frame pointer & Callee \\ + \tt x9 & {\tt s1} & Saved register & Callee \\ + {\tt x10}--{\tt 11} & {\tt a0}--{\tt 1} & Function arguments/return values & Caller \\ + {\tt x12}--{\tt 17} & {\tt a2}--{\tt 7} & Function arguments & Caller \\ + {\tt x18}--{\tt 27} & {\tt s2}--{\tt 11} & Saved registers & Callee \\ + {\tt x28}--{\tt 31} & {\tt t3}--{\tt 6} & Temporaries & Caller \\ + \hline + {\tt f0}--{\tt 7} & {\tt ft0}--{\tt 7} & FP temporaries & Caller \\ + {\tt f8}--{\tt 9} & {\tt fs0}--{\tt 1} & FP saved registers & Callee \\ + {\tt f10}--{\tt 11} & {\tt fa0}--{\tt 1} & FP arguments/return values & Caller \\ + {\tt f12}--{\tt 17} & {\tt fa2}--{\tt 7} & FP arguments & Caller \\ + {\tt f18}--{\tt 27} & {\tt fs2}--{\tt 11} & FP saved registers & Callee \\ + {\tt f28}--{\tt 31} & {\tt ft8}--{\tt 11} & FP temporaries & Caller \\ + \hline + + \end{tabular} +\end{center} +\caption{RISC-V calling convention register usage.} +\label{regmap} +\end{table*} + +\section{Soft-Float Calling Convention} + +The soft-float calling convention is intended for use on RV32 and RV64 +implementations that lack floating-point hardware. It avoids all use +of instructions in the F, D, and Q standard extensions, and hence the +{\tt f} registers. + +Integral arguments are passed and returned in the same manner as the +RVG convention, and the stack discipline is the same except that the +stack is only kept aligned to XLEN/8-byte boundaries (e.g., four-byte +alignment for RV32I). + +\begin{commentary} + As floating-point data on the stack will be accessed using integer + load and store instructions, there is no incentive to maintain stack + alignment at a coarse granularity in the soft-float calling + convention. The reduced stack alignment saves space in the + memory-constrained systems that might commonly use soft + floating-point. +\end{commentary} + +Floating-point arguments are passed and returned in integer registers, +using the rules for integer arguments of the same size. In RV32, for +example, the function {\tt double foo(int, double, long double)} is +passed its first argument in {\tt a0}, its second argument in {\tt a2} +and {\tt a3}, and its third argument by reference via {\tt a4}; its +result is returned in {\tt a0} and {\tt a1}. In RV64, the arguments +are passed in {\tt a0}, {\tt a1}, and the {\tt a2}-{\tt a3} pair, and +the result is returned in {\tt a0}. + +The dynamic rounding mode and accrued exception flags are accessed through +the routines provided by the C99 header {\tt fenv.h}. + +\section{RV32E Calling Convention} + +RV32E uses a subset of the Soft-Float calling convention. As only 16 +integer registers {\tt x0}--{\tt x15} are present, there are only six +argument registers ({\tt x10}--{\tt x15}), two saved registers ({\tt + x8}--{\tt x9}), and three temporary registers ({\tt x5}--{\tt x7}). +The stack is kept aligned on a four-byte boundary. diff --git a/src/cfgstr.tex b/src/cfgstr.tex new file mode 100644 index 0000000..0a2520a --- /dev/null +++ b/src/cfgstr.tex @@ -0,0 +1,55 @@ +\chapter{Machine Configuration Description} +\label{cfgstr} + +RISC-V platforms may contain myriad devices, processor cores, and +configuration parameters. To support higher-level software, including +bootloaders and operating systems, it is recommended that hardware +platforms embed a description of their components in read-only memory +that is directly accessible after processor reset for use by low-level +system software, external debuggers, or manufacturing test procedures. +We call this low-level embedded information a configuration +description. We define here a standard mechanism to encode and locate +the configuration information, and to determine the format of the +configuration information. + +\section{Configuration String Search Procedure} + +The platform must describe how to locate a pointer to find this +string, for example, by specifying a fixed physical address at which +the pointer resides. To support a wide variety of platforms, +configuration formats, and chips with manufacturing-time programming +of configuration options, a flexible search procedure is defined to +locate the configuration information seeded by the initial pointer +specified by the platform. + +The configuration string pointer provided by the platform points to an +initial memory address at which the search for configuration string +begins. + +The configuration string cannot begin with a padding byte, where a +padding byte is defined to contain either {\tt 0x0} or {\tt 0xff}, but +can be preceded by up to 63 padding bytes that are ignored. If 64 +padding bytes are encountered, then the search terminates without +finding a config string. + +\begin{commentary} +The padding bytes represent common values returned by unpopulated +memory or bus regions or unprogrammed non-volatile +memory. Configuration strings can therefore include pointers to +regions that are optionally populated or programmed, and these regions +will be ignored if there is nothing present. The padding bytes also +support alignment of binary data structures. +\end{commentary} + +Otherwise the first non-padding byte is the beginning of the +configuration information. For example, configuration information in +Device Tree String format would begin with a ``/dts-v1/''. +Configuration information in Flattened Device Tree format would begin +with the magic number {\tt 0xd00dfeed}. Configuration information in +the config string format would begin with ``/cs-v1/''. +\begin{commentary} + Config string is a new format that is backwards-compatible with + device tree string (as far as DTS specs exist) but can include + additional configuration information in other memory regions. +\end{commentary} + diff --git a/src/d.tex b/src/d.tex new file mode 100644 index 0000000..8a48137 --- /dev/null +++ b/src/d.tex @@ -0,0 +1,353 @@ +\chapter{``D'' Standard Extension for Double-Precision Floating-Point, +Version 2.0} + +This chapter describes the standard double-precision floating-point +instruction-set extension, which is named ``D'' and adds +double-precision floating-point computational instructions compliant +with the IEEE 754-2008 arithmetic standard. The D extension depends on +the base single-precision instruction subset F. + +\section{D Register State} + +The D extension widens the 32 floating-point registers, {\tt f0}--{\tt +f31}, to 64 bits (FLEN=64 in Figure~\ref{fprs}). + +\section{Double-Precision Load and Store Instructions} + +The FLD instruction loads a double-precision floating-point value from +memory into floating-point register {\em rd}. FSD stores a double-precision +value from the floating-point registers to memory. +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & D & dest & LOAD-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & D & offset[4:0] & STORE-FP \\ +\end{tabular} +\end{center} + +If a floating-point register holds a single-precision value, it is +guaranteed that a FSD of that register will place a value into memory +that when reloaded with a FLD will recreate the original +single-precision value in a register. The data format that is +stored in memory is undefined beyond having this property. + +\begin{commentary} +User-level code might not know the current type of data stored in a +floating-point register but has to be able to save and restore the +register values. A common case is for callee-save registers, but this +is also essential to implement varargs and user-level threading +libraries. +\end{commentary} + +FLD and FSD are only guaranteed to execute atomically if the effective address +is naturally aligned and XLEN$\geq$64. + +\section{Double-Precision Floating-Point Computational Instructions} + +The double-precision floating-point computational instructions are +defined analogously to their single-precision counterparts, but operate on +double-precision operands and produce double-precision results. +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FADD/FSUB & D & src2 & src1 & RM & dest & OP-FP \\ +FMUL/FDIV & D & src2 & src1 & RM & dest & OP-FP \\ +FMIN-MAX & D & src2 & src1 & MIN/MAX & dest & OP-FP \\ +FSQRT & D & 0 & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{rs3} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +src3 & D & src2 & src1 & RM & dest & F[N]MADD/F[N]MSUB \\ +\end{tabular} +\end{center} + +\section{Double-Precision Floating-Point Conversion and Move Instructions} + +Floating-point-to-integer and integer-to-floating-point conversion +instructions are encoded in the OP-FP major opcode space. +FCVT.W.D or FCVT.L.D converts a double-precision floating-point number +in floating-point register {\em rs1} to a signed 32-bit or 64-bit +integer, respectively, in integer register {\em rd}. FCVT.D.W +or FCVT.D.L converts a 32-bit or 64-bit signed integer, +respectively, in integer register {\em rs1} into a +double-precision floating-point +number in floating-point register {\em rd}. FCVT.WU.D, +FCVT.LU.D, FCVT.D.WU, and FCVT.D.LU variants +convert to or from unsigned integer values. FCVT.L[U].D and +FCVT.D.L[U] are illegal in RV32. +The range of valid inputs for FCVT.{\em int}.D and +the behavior for invalid inputs are the same as for FCVT.{\em int}.S. + +All floating-point to integer and integer to floating-point conversion +instructions round according to the {\em rm} field. Note FCVT.D.W[U] always +produces an exact result and is unaffected by rounding mode. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em int}.{\em fmt} & D & W[U]/L[U] & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em int} & D & W[U]/L[U] & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +The double-precision to single-precision and single-precision to +double-precision conversion instructions, FCVT.S.D and FCVT.D.S, are +encoded in the OP-FP major opcode space and both the source and +destination are floating-point registers. The {\em rs2} field +encodes the datatype of the source, and the {\em fmt} field encodes +the datatype of the destination. FCVT.S.D rounds according to the +RM field; FCVT.D.S will never round. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em fmt}.{\em fmt} & S & D & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em fmt} & D & S & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point to floating-point sign-injection instructions, FSGNJ.D, +FSGNJN.D, and FSGNJX.D are defined analogously to the single-precision +sign-injection instruction. + +For FSGNJ.D, if {\em rs1} and {\em rs2} are the same register, which contains +a single-precision floating-point value, the single-precision value will be +correctly copied to {\em rd}. If {\em rs1} and {\em rs2} are not the same, +the result is undefined. For FSGNJN.D and FSGNJX.D, the result is undefined +for any single-precision inputs. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FSGNJ & D & src2 & src1 & J[N]/JX & dest & OP-FP \\ +\end{tabular} +\end{center} + +For RV64 only, instructions are provided to move bit patterns between +the floating-point and integer registers. FMV.X.D moves the +double-precision value in floating-point register {\em rs1} to a +representation in IEEE 754-2008 standard encoding in integer register +{\em rd}. If the last value written to the source floating-point +register was a single-precision floating-point value, then the value +returned by FMV.X.D is undefined beyond having the property that +moving the value back to a floating-point register will recreate the +original single-precision value. FMV.D.X moves the double-precision +value encoded in IEEE 754-2008 standard encoding from the integer +register {\em rs1} to the floating-point register {\em rd}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FMV.X.{\em fmt} & D & 0 & src & 000 & dest & OP-FP \\ +FMV.{\em fmt}.X & D & 0 & src & 000 & dest & OP-FP \\ +\end{tabular} +\end{center} + +\section{Double-Precision Floating-Point Compare Instructions} + +The double-precision floating-point compare instructions are +defined analogously to their single-precision counterparts, but operate on +double-precision operands. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCMP & D & src2 & src1 & EQ/LT/LE & dest & OP-FP \\ +\end{tabular} +\end{center} + +\section{Double-Precision Floating-Point Classify Instruction} + +The double-precision floating-point classify instruction, FCLASS.D, is +defined analogously to its single-precision counterpart, but operates on +double-precision operands. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCLASS & D & 0 & src & 001 & dest & OP-FP \\ +\end{tabular} +\end{center} diff --git a/src/extensions.tex b/src/extensions.tex new file mode 100644 index 0000000..4346422 --- /dev/null +++ b/src/extensions.tex @@ -0,0 +1,381 @@ +\chapter{Extending RISC-V} +\label{extensions} + +In addition to supporting standard general-purpose software +development, another goal of RISC-V is to provide a basis for more +specialized instruction-set extensions or more customized +accelerators. The instruction encoding spaces and optional +variable-length instruction encoding are designed to make it easier to +leverage software development effort for the standard ISA toolchain +when building more customized processors. For example, the intent is +to continue to provide full software support for implementations that +only use the standard I base, perhaps together with many non-standard +instruction-set extensions. + +This chapter describes various ways in which the base RISC-V ISA can +be extended, together with the scheme for managing instruction-set +extensions developed by independent groups. This volume only deals +with the user-level ISA, although the same approach and terminology is +used for supervisor-level extensions described in the second volume. + +\section{Extension Terminology} + +This section defines some standard terminology for describing RISC-V +extensions. +\vspace{-0.2in} +\subsection*{Standard versus Non-Standard Extension} + +Any RISC-V processor implementation must support a base integer ISA +(RV32I or RV64I). In addition, an implementation may support one or +more extensions. We divide extensions into two broad categories: {\em + standard} versus {\em non-standard}. +\begin{itemize} +\item A standard extension is one that is generally useful and that is + designed to not conflict with any other standard extension. + Currently, ``MAFDQLCBTPV'', described in other chapters of this + manual, are either complete or planned standard extensions. +\item A non-standard extension may be highly specialized and may + conflict with other standard or non-standard extensions. We + anticipate a wide variety of non-standard extensions will be + developed over time, with some eventually being promoted to standard + extensions. +\end{itemize} + +\vspace{-0.2in} +\subsection*{Instruction Encoding Spaces and Prefixes} + +An instruction encoding space is some number of instruction bits +within which a base ISA or ISA extension is encoded. RISC-V supports +varying instruction lengths, but even within a single instruction +length, there are various sizes of encoding space available. For +example, the base ISA is defined within a 30-bit encoding space (bits +31--2 of the 32-bit instruction), while the atomic extension ``A'' +fits within a 25-bit encoding space (bits 31--7). + +We use the term {\em prefix} to refer to the bits to the {\em right} +of an instruction encoding space (since RISC-V is little-endian, the +bits to the right are stored at earlier memory addresses, hence form a +prefix in instruction-fetch order). The prefix for the standard base +ISA encoding is the two-bit ``11'' field held in bits 1--0 of the +32-bit word, while the prefix for the standard atomic extension ``A'' +is the seven-bit ``0101111'' field held in bits 6--0 of the 32-bit +word representing the AMO major opcode. A quirk of the encoding +format is that the 3-bit funct3 field used to encode a minor opcode is +not contiguous with the major opcode bits in the 32-bit instruction +format, but is considered part of the prefix for 22-bit instruction +spaces. + +Although an instruction encoding space could be of any size, adopting +a smaller set of common sizes simplifies packing independently +developed extensions into a single global encoding. +Table~\ref{encodingspaces} gives the suggested sizes for RISC-V. + +\begin{table}[H] +\begin{center} +\begin{tabular}{|c|l|r|r|r|r|} +\hline +\multicolumn{1}{|c|}{Size} & \multicolumn{1}{|c|}{Usage} & +\multicolumn{4}{|c|}{\# Available in standard instruction length} \\ \cline{3-6} + & & +\multicolumn{1}{|c|}{16-bit} & +\multicolumn{1}{|c|}{32-bit} & +\multicolumn{1}{|c|}{48-bit} & +\multicolumn{1}{|c|}{64-bit} \\ \hline \hline +14-bit & Quadrant of compressed 16-bit encoding & 3 & & & \\ \hline \hline +22-bit & Minor opcode in base 32-bit encoding & & $2^{8}$ & $2^{20}$ & $2^{35}$ \\ \hline +25-bit & Major opcode in base 32-bit encoding & & 32 & $2^{17}$ & $2^{32}$ \\ \hline +30-bit & Quadrant of base 32-bit encoding & & 1 & $2^{12}$ & $2^{27}$ \\ \hline \hline +32-bit & Minor opcode in 48-bit encoding & & & $2^{10}$ & $2^{25}$ \\ \hline +37-bit & Major opcode in 48-bit encoding & & & 32 & $2^{20}$ \\ \hline +40-bit & Quadrant of 48-bit encoding & & & 4 & $2^{17}$ \\ \hline \hline +45-bit & Sub-minor opcode in 64-bit encoding & & & & $2^{12}$ \\ \hline +48-bit & Minor opcode in 64-bit encoding & & & & $2^{9}$ \\ \hline +52-bit & Major opcode in 64-bit encoding & & & & 32\\ \hline +\end{tabular} +\end{center} +\caption{Suggested standard RISC-V instruction encoding space sizes.} +\label{encodingspaces} +\end{table} + +\vspace{-0.2in} +\subsection*{Greenfield versus Brownfield Extensions} + +We use the term {\em greenfield extension} to describe an extension +that begins populating a new instruction encoding space, and hence can +only cause encoding conflicts at the prefix level. We use the term +{\em brownfield extension} to describe an extension that fits around +existing encodings in a previously defined instruction space. A +brownfield extension is necessarily tied to a particular greenfield +parent encoding, and there may be multiple brownfield extensions to +the same greenfield parent encoding. For example, the base ISAs are +greenfield encodings of a 30-bit instruction space, while the FDQ +floating-point extensions are all brownfield extensions adding to the +parent base ISA 30-bit encoding space. + +Note that we consider the standard A extension to have a greenfield +encoding as it defines a new previously empty 25-bit encoding space in +the leftmost bits of the full 32-bit base instruction encoding, even +though its standard prefix locates it within the 30-bit encoding space +of the base ISA. Changing only its single 7-bit prefix could move the +A extension to a different 30-bit encoding space while only worrying +about conflicts at the prefix level, not within the encoding space +itself. + +\begin{table}[H] +{ +\begin{center} +\begin{tabular}{|r|c|c|} +\hline + & Adds state & No new state \\ \hline +Greenfield & RV32I(30), RV64I(30) & A(25) \\\hline +Brownfield & F(I), D(F), Q(D) & M(I) \\ +\hline +\end{tabular} +\end{center} +} +\caption{Two-dimensional characterization of standard instruction-set + extensions.} +\label{exttax} +\end{table} + +Table~\ref{exttax} shows the bases and standard extensions placed in a +simple two-dimensional taxonomy. One axis is whether the extension is +greenfield or brownfield, while the other axis is whether the +extension adds architectural state. For greenfield extensions, the +size of the instruction encoding space is given in parentheses. For +brownfield extensions, the name of the extension (greenfield or +brownfield) it builds upon is given in parentheses. Additional +user-level architectural state usually implies changes to the +supervisor-level system or possibly to the standard calling +convention. + +Note that RV64I is not considered an extension of RV32I, but a +different complete base encoding. + +\vspace{-0.2in} +\subsection*{Standard-Compatible Global Encodings} + +A complete or {\em global} encoding of an ISA for an actual RISC-V +implementation must allocate a unique non-conflicting prefix for every +included instruction encoding space. The bases and every standard +extension have each had a standard prefix allocated to ensure they can +all coexist in a global encoding. + +A {\em standard-compatible} global encoding is one where the base and +every included standard extension have their standard prefixes. A +standard-compatible global encoding can include non-standard +extensions that do not conflict with the included standard extensions. +A standard-compatible global encoding can also use standard prefixes +for non-standard extensions if the associated standard extensions are +not included in the global encoding. In other words, a standard +extension must use its standard prefix if included in a +standard-compatible global encoding, but otherwise its prefix is free +to be reallocated. These constraints allow a common toolchain to +target the standard subset of any RISC-V standard-compatible global +encoding. + +\vspace{-0.2in} +\subsection*{Guaranteed Non-Standard Encoding Space} + +To support development of proprietary custom extensions, portions of +the encoding space are guaranteed to never be used by standard +extensions. + +\section{RISC-V Extension Design Philosophy} + +We intend to support a large number of independently developed +extensions by encouraging extension developers to operate within +instruction encoding spaces, and by providing tools to pack these into +a standard-compatible global encoding by allocating unique prefixes. +Some extensions are more naturally implemented as brownfield +augmentations of existing extensions, and will share whatever prefix +is allocated to their parent greenfield extension. The standard +extension prefixes avoid spurious incompatibilities in the encoding of +core functionality, while allowing custom packing of more esoteric +extensions. + +This capability of repacking RISC-V extensions into different +standard-compatible global encodings can be used in a number of ways. + +One use-case is developing highly specialized custom accelerators, +designed to run kernels from important application domains. These +might want to drop all but the base integer ISA and add in only the +extensions that are required for the task in hand. The base ISA has +been designed to place minimal requirements on a hardware +implementation, and has been encoded to use only a small fraction of a +32-bit instruction encoding space. + +Another use-case is to build a research prototype for a new type of +instruction-set extension. The researchers might not want to expend +the effort to implement a variable-length instruction-fetch unit, and +so would like to prototype their extension using a simple 32-bit +fixed-width instruction encoding. However, this new extension might +be too large to coexist with standard extensions in the 32-bit space. +If the research experiments do not need all of the standard +extensions, a standard-compatible global encoding might drop the +unused standard extensions and reuse their prefixes to place the +proposed extension in a non-standard location to simplify engineering +of the research prototype. Standard tools will still be able to +target the base and any standard extensions that are present to reduce +development time. Once the instruction-set extension has been +evaluated and refined, it could then be made available for packing +into a larger variable-length encoding space to avoid conflicts with +all standard extensions. + +The following sections describe increasingly sophisticated strategies +for developing implementations with new instruction-set extensions. +These are mostly intended for use in highly customized, educational, +or experimental architectures rather than for the main line of RISC-V +ISA development. + +\section{Extensions within fixed-width 32-bit instruction format} +\label{fix32b} + +In this section, we discuss adding extensions to implementations that +only support the base fixed-width 32-bit instruction format. + +\begin{commentary} +We anticipate the simplest fixed-width 32-bit encoding will be popular for +many restricted accelerators and research prototypes. +\end{commentary} + +\subsection*{Available 30-bit instruction encoding spaces} + +In the standard encoding, three of the available 30-bit instruction +encoding spaces (those with 2-bit prefixes 00, 01, and 10) are used to +enable the optional compressed instruction extension. However, if the +compressed instruction-set extension is not required, then these three +further 30-bit encoding spaces become available. This quadruples the +available encoding space within the 32-bit format. + +\subsection*{Available 25-bit instruction encoding spaces} + +A 25-bit instruction encoding space corresponds to a major opcode in +the base and standard extension encodings. + +There are four major opcodes expressly reserved for custom extensions +(Table~\ref{opcodemap}), each of which represents a 25-bit encoding +space. Two of these are reserved for eventual use in the RV128 base +encoding (will be OP-IMM-64 and OP-64), but can be used for standard +or non-standard extensions for RV32 and RV64. + +The two opcodes reserved for RV64 (OP-IMM-32 and OP-32) can also be +used for standard and non-standard extensions to RV32 only. + +If an implementation does not require floating-point, then the seven +major opcodes reserved for standard floating-point extensions +(LOAD-FP, STORE-FP, MADD, MSUB, NMSUB, NMADD, OP-FP) can be reused for +non-standard extensions. Similarly, the AMO major opcode can be +reused if the standard atomic extensions are not required. + +If an implementation does not require instructions longer than +32-bits, then an additional four major opcodes are available (those +marked in gray in Table~\ref{opcodemap}). + +The base RV32I encoding uses only 11 major opcodes plus 3 reserved +opcodes, leaving up to 18 available for extensions. The base RV64I +encoding uses only 13 major opcodes plus 3 reserved opcodes, leaving +up to 16 available for extensions. + +\subsection*{Available 22-bit instruction encoding spaces} + +A 22-bit encoding space corresponds to a funct3 minor opcode space in +the base and standard extension encodings. Several major opcodes have +a funct3 field minor opcode that is not completely occupied, leaving +available several 22-bit encoding spaces. + +Usually a major opcode selects the format used to encode operands in +the remaining bits of the instruction, and ideally, an extension +should follow the operand format of the major opcode to simplify +hardware decoding. + +\subsection*{Other spaces} + +Smaller spaces are available under certain major opcodes, and not all +minor opcodes are entirely filled. + +\section{Adding aligned 64-bit instruction extensions} + +The simplest approach to provide space for extensions that are too +large for the base 32-bit fixed-width instruction format is to add +naturally aligned 64-bit instructions. The implementation must still +support the 32-bit base instruction format, but can require that +64-bit instructions are aligned on 64-bit boundaries to simplify +instruction fetch, with a 32-bit NOP instruction used as alignment +padding where necessary. + +To simplify use of standard tools, the 64-bit instructions should be +encoded as described in Figure~\ref{instlengthcode}. However, an +implementation might choose a non-standard instruction-length encoding +for 64-bit instructions, while retaining the standard encoding for +32-bit instructions. For example, if compressed instructions are not +required, then a 64-bit instruction could be encoded using one or more +zero bits in the first two bits of an instruction. + +\begin{commentary} +We anticipate processor generators that produce instruction-fetch +units capable of automatically handling any combination of supported +variable-length instruction encodings. +\end{commentary} + +\section{Supporting VLIW encodings} + +Although RISC-V was not designed as a base for a pure VLIW machine, +VLIW encodings can be added as extensions using several alternative +approaches. In all cases, the base 32-bit encoding has to be supported +to allow use of any standard software tools. + +\subsection*{Fixed-size instruction group} + +The simplest approach is to define a single large naturally aligned +instruction format (e.g., 128 bits) within which VLIW operations are +encoded. In a conventional VLIW, this approach would tend to waste +instruction memory to hold NOPs, but a RISC-V-compatible +implementation would have to also support the base 32-bit +instructions, confining the VLIW code size expansion to +VLIW-accelerated functions. + +\subsection*{Encoded-Length Groups} + +Another approach is to use the standard length encoding from +Figure~\ref{instlengthcode} to encode parallel instruction groups, +allowing NOPs to be compressed out of the VLIW instruction. For +example, a 64-bit instruction could hold two 28-bit operations, while +a 96-bit instruction could hold three 28-bit operations, and so on. +Alternatively, a 48-bit instruction could hold one 42-bit operation, +while a 96-bit instruction could hold two 42-bit operations, and so +on. + +This approach has the advantage of retaining the base ISA encoding for +instructions holding a single operation, but has the disadvantage of +requiring a new 28-bit or 42-bit encoding for operations within the +VLIW instructions, and misaligned instruction fetch for larger groups. +One simplification is to not allow VLIW instructions to straddle +certain microarchitecturally significant boundaries (e.g., cache lines +or virtual memory pages). + +\subsection*{Fixed-Size Instruction Bundles} + +Another approach, similar to Itanium, is to use a larger naturally +aligned fixed instruction bundle size (e.g., 128 bits) across which +parallel operation groups are encoded. This simplifies instruction +fetch, but shifts the complexity to the group execution engine. To +remain RISC-V compatible, the base 32-bit instruction would still have +to be supported. + +\subsection*{End-of-Group bits in Prefix} + +None of the above approaches retains the RISC-V encoding for the +individual operations within a VLIW instruction. Yet another approach +is to repurpose the two prefix bits in the fixed-width 32-bit +encoding. One prefix bit can be used to signal ``end-of-group'' if +set, while the second bit could indicate execution under a predicate +if clear. Standard RISC-V 32-bit instructions generated by tools +unaware of the VLIW extension would have both prefix bits set (11) and +thus have the correct semantics, with each instruction at the end of a +group and not predicated. + +The main disadvantage of this approach is that the base ISA lacks the +complex predication support usually required in an aggressive VLIW +system, and it is difficult to add space to specify more predicate +registers in the standard 30-bit encoding space. diff --git a/src/f.tex b/src/f.tex new file mode 100644 index 0000000..94c0920 --- /dev/null +++ b/src/f.tex @@ -0,0 +1,742 @@ +\chapter{``F'' Standard Extension for Single-Precision Floating-Point, +Version 2.0} +\label{sec:single-float} + +This chapter describes the standard instruction-set extension for +single-precision floating-point, which is named ``F'' and adds +single-precision floating-point computational instructions compliant +with the IEEE 754-2008 arithmetic standard~\cite{ieee754-2008}. + + +\section{F Register State} + +The F extension adds 32 floating-point registers, {\tt f0}--{\tt f31}, +each 32 bits wide, and a floating-point control and status register +{\tt fcsr}, which contains the operating mode and exception status of the +floating-point unit. This additional state is shown in +Figure~\ref{fprs}. We use the term FLEN to describe the width of the +floating-point registers in the RISC-V ISA, and FLEN=32 for the F +single-precision floating-point extension. Most floating-point +instructions operate on values in the floating-point register file. +Floating-point load and store instructions transfer floating-point +values between registers and memory. Instructions to transfer values +to and from the integer register file are also provided. + +\begin{figure}[htbp] +{\footnotesize +\begin{center} +\begin{tabular}{p{2in}} +\instbitrange{FLEN-1}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f0\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f1\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f2\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f3\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f4\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f5\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f6\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f7\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f8\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f9\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f10\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f11\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f12\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f13\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f14\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f15\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f16\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f17\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f18\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f19\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f20\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f21\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f22\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f23\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f24\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f25\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f26\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f27\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f28\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f29\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f30\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f31\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{c}{FLEN} \\ + +\instbitrange{31}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{fcsr}} \\ \cline{1-1} +\multicolumn{1}{c}{32} \\ +\end{tabular} +\end{center} +} +\caption{RISC-V standard F extension single-precision floating-point state.} +\label{fprs} +\end{figure} + +\begin{commentary} +We considered a unified register file for both integer and +floating-point values as this simplifies software register allocation +and calling conventions, and reduces total user state. However, a +split organization increases the total number of registers accessible +with a given instruction width, simplifies provision of enough regfile +ports for wide superscalar issue, supports decoupled +floating-point-unit architectures, and simplifies use of internal +floating-point encoding techniques. Compiler support and calling +conventions for split register file architectures are well understood, +and using dirty bits on floating-point register file state can reduce +context-switch overhead. +\end{commentary} + +\clearpage + +\section{Floating-Point Control and Status Register} + +The floating-point control and status register, {\tt fcsr}, is a RISC-V +control and status register (CSR). It is a 32-bit read/write register that +selects the dynamic rounding mode for floating-point arithmetic operations and +holds the accrued exception flags, as shown in Figure~\ref{fcsr}. + +\begin{figure*} +{\footnotesize +\begin{center} +\begin{tabular}{K@{}E@{}ccccc} +\instbitrange{31}{8} & +\instbitrange{7}{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{Rounding Mode ({\tt frm})} & +\multicolumn{5}{c|}{Accrued Exceptions ({\tt fflags})} \\ +\hline +\multicolumn{1}{c}{} & +\multicolumn{1}{c|}{} & +\multicolumn{1}{c|}{NV} & +\multicolumn{1}{c|}{DZ} & +\multicolumn{1}{c|}{OF} & +\multicolumn{1}{c|}{UF} & +\multicolumn{1}{c|}{NX} \\ +\cline{3-7} +24 & 3 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Floating-point control and status register.} +\label{fcsr} +\end{figure*} + +The {\tt fcsr} register can be read and written with the FRCSR and +FSCSR instructions, which are assembler pseudo-ops built on the +underlying CSR access instructions. FRCSR reads {\tt fcsr} by copying +it into integer register {\em rd}. FSCSR swaps the value in {\tt + fcsr} by copying the original value into integer register {\em rd}, +and then writing a new value obtained from integer register {\em rs1} +into {\tt fcsr}. + +The fields within the {\tt fcsr} can also be accessed individually +through different CSR addresses, and separate assembler pseudo-ops are +defined for these accesses. The FRRM instruction reads the Rounding +Mode field {\tt frm} and copies it into the least-significant three +bits of integer register {\em rd}, with zero in all other bits. FSRM +swaps the value in {\tt frm} by copying the original value into +integer register {\em rd}, and then writing a new value obtained from +the three least-significant bits of integer register {\em rs1} into +{\tt frm}. FRFLAGS and FSFLAGS are defined analogously for the +Accrued Exception Flags field {\tt fflags}. Additional +pseudo-instructions FSRMI and FSFLAGSI swap values using an immediate +value instead of register {\em rs1}. + +Floating-point operations use either a static rounding mode encoded in the +instruction, or a dynamic rounding mode held in {\tt frm}. Rounding modes are +encoded as shown in Table~\ref{rm}. A value of 111 in the instruction's {\em +rm} field selects the dynamic rounding mode held in {\tt frm}. If {\tt frm} +is set to an invalid value (101--111), any subsequent attempt to execute +a floating-point operation with a dynamic rounding mode will cause an illegal +instruction trap. Some instructions that have the {\em rm} field are +nevertheless unaffected by the rounding mode; they should have their {\em rm} +field set to RNE (000). + +\begin{commentary} +The C99 language standard effectively mandates the provision of a +dynamic rounding mode register. +\end{commentary} +\newpage + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{ccl} +\hline +\multicolumn{1}{|c|}{Rounding Mode} & +\multicolumn{1}{c|}{Mnemonic} & +\multicolumn{1}{c|}{Meaning} \\ +\hline +\multicolumn{1}{|c|}{000} & +\multicolumn{1}{l|}{RNE} & +\multicolumn{1}{l|}{Round to Nearest, ties to Even}\\ +\hline +\multicolumn{1}{|c|}{001} & +\multicolumn{1}{l|}{RTZ} & +\multicolumn{1}{l|}{Round towards Zero}\\ +\hline +\multicolumn{1}{|c|}{010} & +\multicolumn{1}{l|}{RDN} & +\multicolumn{1}{l|}{Round Down (towards $-\infty$)}\\ +\hline +\multicolumn{1}{|c|}{011} & +\multicolumn{1}{l|}{RUP} & +\multicolumn{1}{l|}{Round Up (towards $+\infty$)}\\ +\hline +\multicolumn{1}{|c|}{100} & +\multicolumn{1}{l|}{RMM} & +\multicolumn{1}{l|}{Round to Nearest, ties to Max Magnitude}\\ +\hline +\multicolumn{1}{|c|}{101} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\ +\hline +\multicolumn{1}{|c|}{110} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\ +\hline +\multicolumn{1}{|c|}{111} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{In instruction's {\em rm} field, selects dynamic rounding mode;}\\ +\multicolumn{1}{|c|}{} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{In Rounding Mode register, {\em Invalid}.}\\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Rounding mode encoding.} +\label{rm} +\end{table} + +The accrued exception flags indicate the exception conditions that +have arisen on any floating-point arithmetic instruction since the +field was last reset by software, as shown in Table~\ref{bitdef}. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{cl} +\hline +\multicolumn{1}{|c|}{Flag Mnemonic} & +\multicolumn{1}{c|}{Flag Meaning} \\ +\hline +\multicolumn{1}{|c|}{NV} & +\multicolumn{1}{c|}{Invalid Operation}\\ +\hline +\multicolumn{1}{|c|}{DZ} & +\multicolumn{1}{c|}{Divide by Zero}\\ +\hline +\multicolumn{1}{|c|}{OF} & +\multicolumn{1}{c|}{Overflow}\\ +\hline +\multicolumn{1}{|c|}{UF} & +\multicolumn{1}{c|}{Underflow}\\ +\hline +\multicolumn{1}{|c|}{NX} & +\multicolumn{1}{c|}{Inexact}\\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Accrued exception flag encoding.} +\label{bitdef} +\end{table} + +\begin{commentary} +As allowed by the standard, we do not support traps on floating-point +exceptions in the base ISA, but instead require explicit checks of the flags +in software. We considered adding branches controlled directly by the +contents of the floating-point accrued exception flags, but ultimately chose +to omit these instructions to keep the ISA simple. +\end{commentary} + +\section{NaN Generation and Propagation} + +Except when otherwise stated, if the result of a floating-point operation is +NaN, it is the canonical NaN. The canonical NaN has a positive sign and all +significand bits clear except the MSB, a.k.a. the quiet bit. For +single-precision floating-point, this corresponds to the pattern {\tt +0x7fc00000}. + +For FMIN and FMAX, if at least one input is a signaling NaN, or if both inputs +are quiet NaNs, the result is the canonical NaN. If one operand is a quiet NaN +and the other is not a NaN, the result is the non-NaN operand. + +The sign-injection instructions (FSGNJ, FSGNJN, FSGNJX) do not canonicalize +NaNs; they manipulate the underlying bit patterns directly. + +\begin{commentary} +We considered propagating NaN payloads, as is recommended by the standard, +but this decision would have increased hardware cost. Moreover, since this +feature is optional in the standard, it cannot be used in portable code. + +Implementors are free to provide a NaN payload propagation scheme as +a nonstandard extension enabled by a nonstandard operating mode. However, the +canonical NaN scheme described above must always be supported and should be +the default mode. +\end{commentary} + +\begin{commentary} +We require implementations to return the standard-mandated default +values in the case of exceptional conditions, without any further +intervention on the part of user-level software (unlike the Alpha ISA +floating-point trap barriers). We believe full hardware handling of +exceptional cases will become more common, and so wish to avoid +complicating the user-level ISA to optimize other approaches. +Implementations can always trap to machine-mode software handlers to +provide exceptional default values. +\end{commentary} + +\section{Subnormal Arithmetic} + +Operations on subnormal numbers are handled in accordance with the IEEE +754-2008 standard. + +In the parlance of the IEEE standard, tininess is detected after +rounding---that is, the underflow exception is raised only if the rounded +result is subnormal, even if the unrounded result would have been subnormal. + +\begin{commentary} +Detecting tininess after rounding results in fewer spurious underflow signals. +\end{commentary} + +\section{Single-Precision Load and Store Instructions} + +Floating-point loads and stores use the same base+offset addressing +mode as the integer base ISA, with a base address in register {\em + rs1} and a 12-bit signed byte offset. The FLW instruction loads a +single-precision floating-point value from memory into floating-point +register {\em rd}. FSW stores a single-precision value from +floating-point register {\em rs2} to memory. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & W & dest & LOAD-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & W & offset[4:0] & STORE-FP \\ +\end{tabular} +\end{center} + +FLW and FSW are only guaranteed to execute atomically if the effective address +is naturally aligned. + +\section{Single-Precision Floating-Point Computational Instructions} +\label{sec:single-float-compute} + +Floating-point arithmetic instructions with one or two source operands use the +R-type format with the OP-FP major opcode. FADD.S, FSUB.S, +FMUL.S, and FDIV.S perform single-precision floating-point addition, +subtraction, multiplication, and division, respectively, between {\em rs1} and +{\em rs2}, writing the result to {\em rd}. FMIN.S and FMAX.S +write, respectively, the smaller or larger of {\em rs1} and {\em rs2} to {\em +rd}. FSQRT.S computes the square root of {\em rs1} and writes the +result to {\em rd}. + +The 2-bit floating-point format field {\em fmt} is encoded as shown in +Table~\ref{tab:fmt}. It is set to {\em S} (00) for all instructions in +the F extension. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|c|c|l|} +\hline +{\em fmt} field & +Mnemonic & +Meaning \\ +\hline +00 & S & 32-bit single-precision \\ +01 & D & 64-bit double-precision \\ +10 & - & {\em reserved} \\ +11 & Q & 128-bit quad-precision \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Format field encoding.} +\label{tab:fmt} +\end{table} + +All floating-point operations that perform rounding can select the +rounding mode using the {\em rm} field with the encoding shown in +Table~\ref{rm}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FADD/FSUB & S & src2 & src1 & RM & dest & OP-FP \\ +FMUL/FDIV & S & src2 & src1 & RM & dest & OP-FP \\ +FMIN-MAX & S & src2 & src1 & MIN/MAX & dest & OP-FP \\ +FSQRT & S & 0 & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point fused multiply-add instructions require a new standard +instruction format. R4-type instructions specify three source +registers ({\em rs1}, {\em rs2}, and {\em rs3}) and a destination +register ({\em rd}). This format is only used by the floating-point +fused multiply-add instructions. Fused multiply-add instructions +multiply the values in {\em rs1} and {\em rs2}, optionally negate the +product, then add or subtract the value in {\em rs3}, writing the final +result to {\em rd}. +FMADD.S computes {\em rs1$\times$rs2+rs3}; FMSUB.S computes +{\em rs1$\times$rs2-rs3}; FNMSUB.S computes {\em + -rs1$\times$rs2+rs3}; and FNMADD.S computes {\em + -rs1$\times$rs2-rs3}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{rs3} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +src3 & S & src2 & src1 & RM & dest & F[N]MADD/F[N]MSUB \\ +\end{tabular} +\end{center} + +\section{Single-Precision Floating-Point Conversion and Move \mbox{Instructions}} + +Floating-point-to-integer and integer-to-floating-point conversion +instructions are encoded in the OP-FP major opcode space. +FCVT.W.S or FCVT.L.S converts a floating-point number +in floating-point register {\em rs1} to a signed 32-bit or 64-bit +integer, respectively, in integer register {\em rd}. FCVT.S.W +or FCVT.S.L converts a 32-bit or 64-bit signed integer, +respectively, in integer register {\em rs1} into a floating-point +number in floating-point register {\em rd}. FCVT.WU.S, +FCVT.LU.S, FCVT.S.WU, and FCVT.S.LU variants +convert to or from unsigned integer values. FCVT.L[U].S and +FCVT.S.L[U] are illegal in RV32. +If the rounded result is not representable in the destination format, +it is clipped to the nearest value and the invalid flag is set. +Table~\ref{tab:int_conv} gives the range of valid inputs for FCVT.{\em int}.S +and the behavior for invalid inputs. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|l|r|r|r|r|} +\hline + & FCVT.W.S & FCVT.WU.S & FCVT.L.S & FCVT.LU.S \\ +\hline +Minimum valid input (after rounding) & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Maximum valid input (after rounding) & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +\hline +Output for out-of-range negative input & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Output for $-\infty$ & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Output for out-of-range positive input & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +Output for $+\infty$ or NaN & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Domains of float-to-integer conversions and behavior for invalid inputs.} +\label{tab:int_conv} +\end{table} + +All floating-point to integer and integer to floating-point conversion +instructions round according to the {\em rm} field. A floating-point register +can be initialized to floating-point positive zero using FCVT.S.W {\em rd}, +{\tt x0}, which will never raise any exceptions. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em int}.{\em fmt} & S & W[U]/L[U] & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em int} & S & W[U]/L[U] & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point to floating-point sign-injection instructions, FSGNJ.S, +FSGNJN.S, and FSGNJX.S, produce a result that takes all bits except +the sign bit from {\em rs1}. For FSGNJ, the result's sign bit is {\em + rs2}'s sign bit; for FSGNJN, the result's sign bit is the opposite +of {\em rs2}'s sign bit; and for FSGNJX, the sign bit is the XOR of +the sign bits of {\em rs1} and {\em rs2}. Sign-injection instructions +do not set floating-point exception flags. Note, FSGNJ.S {\em rx, ry, + ry} moves {\em ry} to {\em rx} (assembler pseudo-op FMV.S {\em rx, + ry}); FSGNJN.S {\em rx, ry, ry} moves the the negation of {\em ry} to +{\em rx} (assembler pseudo-op FNEG.S {\em rx, ry}); and FSGNJX.S {\em rx, + ry, ry} moves the absolute value of {\em ry} to {\em rx} (assembler +pseudo-op FABS.S {\em rx, ry}). + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FSGNJ & S & src2 & src1 & J[N]/JX & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{commentary} +The sign-injection instructions +provide floating-point MV, ABS, and NEG, +as well as supporting a few other operations, including the IEEE copySign +operation and sign manipulation in transcendental math function +libraries. Although MV, ABS, and NEG only need a single register +operand, whereas FSGNJ instructions need two, it is unlikely most +microarchitectures would add optimizations to benefit from the reduced +number of register reads for these relatively infrequent instructions. +Even in this case, a microarchitecture can simply detect when both +source registers are the same for FSGNJ instructions and only read a +single copy. +\end{commentary} + +Instructions are provided to move bit patterns between the +floating-point and integer registers. FMV.X.S moves the +single-precision value in floating-point register {\em rs1} +represented in IEEE 754-2008 encoding to the lower 32 bits of integer +register {\em rd}. For RV64, the higher 32 bits of the destination +register are filled with copies of the floating-point number's sign +bit. FMV.S.X moves the single-precision value encoded in IEEE +754-2008 standard encoding from the lower 32 bits of integer register +{\em rs1} to the floating-point register {\em rd}. The bits are not +modified in the transfer, and in particular, the payloads of +non-canonical NaNs are preserved. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FMV.X.{\em fmt} & S & 0 & src & 000 & dest & OP-FP \\ +FMV.{\em fmt}.X & S & 0 & src & 000 & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{commentary} +The base floating-point ISA was defined so as to allow implementations +to employ an internal recoding of the floating-point format in +registers to simplify handling of subnormal values and possibly to +reduce functional unit latency. To this end, the base ISA avoids +representing integer values in the floating-point registers by +defining conversion and comparison operations that read and write the +integer register file directly. This also removes many of the common +cases where explicit moves between integer and floating-point +registers are required, reducing instruction count and critical paths +for common mixed-format code sequences. +\end{commentary} + +\section{Single-Precision Floating-Point Compare Instructions} + +Floating-point compare instructions perform the specified comparison (equal, +less than, or less than or equal) between floating-point registers {\em rs1} +and {\em rs2} and record the Boolean result in integer register {\em rd}. + +FLT.S and FLE.S perform what the IEEE 754-2008 standard refers to as {\em +signaling} comparisons: that is, an Invalid Operation exception is raised if +either input is NaN. FEQ.S performs a {\em quiet} comparison: only signaling +NaN inputs cause an Invalid Operation exception. For all three instructions, +the result is 0 if either operand is NaN. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCMP & S & src2 & src1 & EQ/LT/LE & dest & OP-FP \\ +\end{tabular} +\end{center} + +\section{Single-Precision Floating-Point Classify Instruction} + +The FCLASS.S instruction examines the value in floating-point register {\em +rs1} and writes to integer register {\em rd} a 10-bit mask that indicates +the class of the floating-point number. The format of the mask is +described in Table~\ref{tab:fclass}. The corresponding bit in {\em rd} will +be set if the the property is true and clear otherwise. All other bits in +{\em rd} are cleared. Note that exactly one bit in {\em rd} will be set. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCLASS & S & 0 & src & 001 & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|c|l|} +\hline +{\em rd} bit & +Meaning \\ +\hline +0 & {\em rs1} is $-\infty$. \\ +1 & {\em rs1} is a negative normal number. \\ +2 & {\em rs1} is a negative subnormal number. \\ +3 & {\em rs1} is $-0$. \\ +4 & {\em rs1} is $+0$. \\ +5 & {\em rs1} is a positive subnormal number. \\ +6 & {\em rs1} is a positive normal number. \\ +7 & {\em rs1} is $+\infty$. \\ +8 & {\em rs1} is a signaling NaN. \\ +9 & {\em rs1} is a quiet NaN. \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Format of result of FCLASS instruction.} +\label{tab:fclass} +\end{table} diff --git a/src/figs/PLIC-block-diagram.pdf b/src/figs/PLIC-block-diagram.pdf new file mode 100644 index 0000000..e9141f0 Binary files /dev/null and b/src/figs/PLIC-block-diagram.pdf differ diff --git a/src/figs/PLIC-interrupt-flow.pdf b/src/figs/PLIC-interrupt-flow.pdf new file mode 100644 index 0000000..039c1b8 Binary files /dev/null and b/src/figs/PLIC-interrupt-flow.pdf differ diff --git a/src/figs/halimps.pdf b/src/figs/halimps.pdf new file mode 100644 index 0000000..2adddf3 Binary files /dev/null and b/src/figs/halimps.pdf differ diff --git a/src/figs/halmode.pdf b/src/figs/halmode.pdf new file mode 100644 index 0000000..3f4c99c Binary files /dev/null and b/src/figs/halmode.pdf differ diff --git a/src/figs/privimps.pdf b/src/figs/privimps.pdf new file mode 100644 index 0000000..a5f433c Binary files /dev/null and b/src/figs/privimps.pdf differ diff --git a/src/figs/virtimps.pdf b/src/figs/virtimps.pdf new file mode 100644 index 0000000..2ab2682 Binary files /dev/null and b/src/figs/virtimps.pdf differ diff --git a/src/gmaps.tex b/src/gmaps.tex new file mode 100644 index 0000000..2caaa79 --- /dev/null +++ b/src/gmaps.tex @@ -0,0 +1,75 @@ +\chapter{RV32/64G Instruction Set Listings} + +One goal of the RISC-V project is that it be used as a stable software +development target. For this purpose, we define a combination of a +base ISA (RV32I or RV64I) plus selected standard extensions (IMAFD) as +a ``general-purpose'' ISA, and we use the abbreviation G for the IMAFD +combination of instruction-set extensions. This chapter presents +opcode maps and instruction-set listings for RV32G and RV64G. + +\input{opcode-map} + +Table~\ref{opcodemap} shows a map of the major opcodes for RVG. Major +opcodes with 3 or more lower bits set are reserved for instruction +lengths greater than 32 bits. Opcodes marked as {\em reserved} should +be avoided for custom instruction set extensions as they might be used +by future standard extensions. Major opcodes marked as {\em custom-0} +and {\em custom-1} will be avoided by future standard extensions and +are recommended for use by custom instruction-set extensions within +the base 32-bit instruction format. The opcodes marked {\em + custom-2/rv128} and {\em custom-3/rv128} are reserved for future use +by RV128, but will otherwise be avoided for standard extensions and so +can also be used for custom instruction-set extensions in RV32 and +RV64. + +We believe RV32G and RV64G provide simple but complete instruction +sets for a broad range of general-purpose computing. The optional +compressed instruction set described in Chapter~\ref{compressed} can +be added (forming RV32GC and RV64GC) to improve performance, code +size, and energy efficiency, though with some additional hardware +complexity. + +As we move beyond IMAFDC into further instruction set extensions, the +added instructions tend to be more domain-specific and only provide +benefits to a restricted class of applications, e.g., for multimedia +or security. Unlike most commercial ISAs, the RISC-V ISA design +clearly separates the base ISA and broadly applicable standard +extensions from these more specialized additions. +Chapter~\ref{extensions} has a more extensive discussion of ways to +add extensions to the RISC-V ISA. + +\input{instr-table} + +\FloatBarrier +Table~\ref{rvgcsrnames} lists the CSRs that have +currently been allocated CSR addresses. The timers, counters, and +floating-point CSRs are the only CSRs defined in this specification. + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{Floating-Point Control and Status Registers} \\ +\hline +\tt 0x001 & Read/write &\tt fflags & Floating-Point Accrued Exceptions. \\ +\tt 0x002 & Read/write &\tt frm & Floating-Point Dynamic Rounding Mode. \\ +\tt 0x003 & Read/write &\tt fcsr & Floating-Point Control and Status +Register ({\tt frm} + {\tt fflags}). \\ +\hline +\multicolumn{4}{|c|}{Counters and Timers} \\ +\hline +\tt 0xC00 & Read-only &\tt cycle & Cycle counter for RDCYCLE instruction. \\ +\tt 0xC01 & Read-only &\tt time & Timer for RDTIME instruction. \\ +\tt 0xC02 & Read-only &\tt instret & Instructions-retired counter for RDINSTRET instruction. \\ +\tt 0xC80 & Read-only &\tt cycleh & Upper 32 bits of {\tt cycle}, RV32I only. \\ +\tt 0xC81 & Read-only &\tt timeh & Upper 32 bits of {\tt time}, RV32I only. \\ +\tt 0xC82 & Read-only &\tt instreth & Upper 32 bits of {\tt instret}, RV32I only. \\ +\hline +\end{tabular} +\end{center} +\caption{RISC-V control and status register (CSR) address map.} +\label{rvgcsrnames} +\end{table} + diff --git a/src/graffles/PLIC-block-diagram.graffle b/src/graffles/PLIC-block-diagram.graffle new file mode 100644 index 0000000..aeca213 Binary files /dev/null and b/src/graffles/PLIC-block-diagram.graffle differ diff --git a/src/graffles/PLIC-interrupt-flow.graffle b/src/graffles/PLIC-interrupt-flow.graffle new file mode 100644 index 0000000..c0d79ac Binary files /dev/null and b/src/graffles/PLIC-interrupt-flow.graffle differ diff --git a/src/graffles/privimps.graffle b/src/graffles/privimps.graffle new file mode 100644 index 0000000..f260ab1 --- /dev/null +++ b/src/graffles/privimps.graffle @@ -0,0 +1,8179 @@ + + + + + ApplicationVersion + + com.omnigroup.OmniGrafflePro + 139.17.0.185490 + + CreationDate + 2011-07-18 06:24:39 +0000 + Creator + Krste Asanovic + GraphDocumentVersion + 8 + GuidesLocked + NO + GuidesVisible + YES + ImageCounter + 1 + LinksVisible + NO + MagnetsVisible + NO + MasterSheets + + ModificationDate + 2014-10-12 23:17:45 +0000 + Modifier + Krste Asanovic + NotesVisible + NO + OriginVisible + NO + PageBreaks + YES + PrintInfo + + NSBottomMargin + + float + 41 + + NSHorizonalPagination + + coded + BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG + + NSLeftMargin + + float + 18 + + NSPaperSize + + size + {612, 792} + + NSPrintReverseOrientation + + int + 0 + + NSRightMargin + + float + 18 + + NSTopMargin + + float + 18 + + + ReadOnly + NO + Sheets + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Class + Group + Graphics + + + Bounds + {{283.46457958221436, 116.22047567367554}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 273 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 HEE} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 104.88189268112183}, {266.45669555664062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 274 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HBI} + VerticalPad + 0 + + + + Bounds + {{419.52756786346436, 76.535435199737549}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 275 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Class + Group + Graphics + + + Bounds + {{487.55906867980957, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 277 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{487.55906867980957, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 278 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{419.52756786346436, 59.527561664581299}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 279 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{419.5275707244873, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 280 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{419.5275707244873, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 281 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 276 + + + Class + Group + Graphics + + + Bounds + {{351.49607563018799, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 283 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{351.49607563018799, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 284 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 59.527561187744141}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 285 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{283.46457767486572, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 286 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{283.46457767486572, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 287 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 282 + + + Bounds + {{283.46457481384277, 87.874018669128418}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 288 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hypervisor} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 76.535435676574707}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 289 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + ID + 272 + + + Class + Group + Graphics + + + Bounds + {{192.75591373443604, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 242 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{192.75591373443604, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 243 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 87.874019145965576}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 244 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 SEE} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 76.535436153411865}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 245 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 59.527561664581299}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 246 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{124.72441577911377, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 247 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{124.72441577911377, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 248 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 241 + + + Class + Group + Graphics + + + Bounds + {{28.346454620361328, 59.527560710906982}, {65.196853637695312, 17.00787353515625}} + Class + ShapedGraphic + ID + 238 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 AEE} + VerticalPad + 0 + + + + Bounds + {{28.346457481384277, 48.188977718353271}, {65.196853637695312, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 239 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{28.346457481384277, 28.346457004547119}, {65.196853637695312, 19.842521667480469}} + Class + ShapedGraphic + ID + 240 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 237 + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + privimps + UniqueID + 3 + VPages + 1 + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Class + Group + Graphics + + + Bounds + {{283.46457481384277, 133.2283501625061}, {266.45669555664062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 313 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HAL} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 144.56693410873413}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 314 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hardware} + VerticalPad + 0 + + + + Bounds + {{283.46457958221436, 116.22047567367554}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 315 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 HEE} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 104.88189268112183}, {266.45669555664062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 316 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HBI} + VerticalPad + 0 + + + + Bounds + {{419.52756786346436, 76.535435199737549}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 317 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Class + Group + Graphics + + + Bounds + {{487.55906867980957, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 319 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{487.55906867980957, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 320 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{419.52756786346436, 59.527561664581299}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 321 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{419.5275707244873, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 322 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{419.5275707244873, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 323 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 318 + + + Class + Group + Graphics + + + Bounds + {{351.49607563018799, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 325 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{351.49607563018799, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 326 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 59.527561187744141}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 327 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{283.46457767486572, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 328 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{283.46457767486572, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 329 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 324 + + + Bounds + {{283.46457481384277, 87.874018669128418}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 330 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hypervisor} + VerticalPad + 0 + + + + Bounds + {{283.46457481384277, 76.535435676574707}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 331 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + ID + 312 + + + Class + Group + Graphics + + + Bounds + {{124.72441291809082, 116.22047662734985}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 301 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hardware} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 104.88189268112183}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 302 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HAL} + VerticalPad + 0 + + + + Bounds + {{192.75591373443604, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 303 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{192.75591373443604, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 304 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 87.874019145965576}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 305 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 SEE} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 76.535436153411865}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 306 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Bounds + {{124.72441291809082, 59.527561664581299}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 307 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{124.72441577911377, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 308 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{124.72441577911377, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 309 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 300 + + + Class + Group + Graphics + + + Bounds + {{28.346457481384277, 87.874019145965576}, {65.196853637695312, 17.00787353515625}} + Class + ShapedGraphic + ID + 295 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hardware} + VerticalPad + 0 + + + + Bounds + {{28.346457481384277, 76.535435199737549}, {65.196853637695312, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 296 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HAL} + VerticalPad + 0 + + + + Bounds + {{28.346454620361328, 59.527560710906982}, {65.196853637695312, 17.00787353515625}} + Class + ShapedGraphic + ID + 297 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 AEE} + VerticalPad + 0 + + + + Bounds + {{28.346457481384277, 48.188977718353271}, {65.196853637695312, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 298 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{28.346457481384277, 28.346457004547119}, {65.196853637695312, 19.842521667480469}} + Class + ShapedGraphic + ID + 299 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 294 + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + halimps + UniqueID + 4 + VPages + 1 + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Class + LineGraphic + ID + 351 + Points + + {385.51182174682617, 28.346457481384277} + {436.53544521331787, 28.346457481384277} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 350 + Points + + {407.99999885709235, 28.346457481384277} + {407.99999885709235, 56.692914962768555} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 351 + Position + 0.44074049592018127 + + + + Bounds + {{413.85827922821045, 36.850394725799561}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 349 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 346 + Position + 0.44074049592018127 + + ID + 348 + Points + + {407.49999993953463, 82.204726696014404} + {407.99999885709235, 104.88189268112183} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 342 + Position + 0.4309411346912384 + + + + Bounds + {{414.86615371704102, 85.039372444152832}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 347 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hypervisor} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + ID + 346 + Points + + {385.51182174682617, 104.88189268112183} + {436.53544521331787, 104.88189268112183} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 345 + Points + + {385.51182174682617, 56.692914962768555} + {436.53544521331787, 56.692914962768555} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + Head + + ID + 342 + Position + 0.44074049592018127 + + ID + 344 + Points + + {408.18898773193359, 56.692914962768555} + {407.99999885709235, 82.204726696014404} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Bounds + {{414.86615371704102, 62.36220645904541}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 343 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + ID + 342 + Points + + {385.51182174682617, 82.204726696014404} + {436.53544521331787, 82.204726696014404} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 341 + Points + + {82.204726696014404, 79.370080947875977} + {82.204726696014404, 105.88189268112183} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Bounds + {{18.015748977661133, 85.039372444152832}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 340 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + Wrap + NO + + + Bounds + {{49.043309688568115, 46.858269214630127}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 339 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 336 + Position + 0.50578701496124268 + + ID + 338 + Points + + {82.00000073021161, 28.346457481384277} + {82.500001168391805, 79.370080947875977} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 335 + Position + 0.49598762392997742 + + + + Class + LineGraphic + ID + 337 + Points + + {56.692914962768555, 104.88189268112183} + {107.71653842926025, 104.88189268112183} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 336 + Points + + {56.692914962768555, 79.370080947875977} + {107.71653842926025, 79.370080947875977} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 335 + Points + + {56.692914962768555, 28.346457481384277} + {107.71653842926025, 28.346457481384277} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{338.01969623565674, 11.338582992553711}, {131, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 333 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 With Hypervisor Support} + VerticalPad + 0 + + Wrap + NO + + + Bounds + {{36.850394725799561, 11.338582992553711}, {111, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 332 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Classic Virtualization} + VerticalPad + 0 + + Wrap + NO + + + Bounds + {{249.44882297515869, 76.535435199737549}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 317 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Class + Group + Graphics + + + Bounds + {{317.48032379150391, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 319 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{317.48032379150391, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 320 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{249.44882297515869, 59.527561664581299}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 321 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Guest OS} + VerticalPad + 0 + + + + Bounds + {{249.44882583618164, 48.188978672027588}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 322 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{249.44882583618164, 28.346457958221436}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 323 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 318 + + + Class + Group + Graphics + + + Bounds + {{181.41733074188232, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 325 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{181.41733074188232, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 326 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{113.38582992553711, 59.527561187744141}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 327 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Guest OS} + VerticalPad + 0 + + + + Bounds + {{113.38583278656006, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 328 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{113.38583278656006, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 329 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 324 + + + Bounds + {{113.38582992553711, 87.874018669128418}, {266.45669555664062, 17.00787353515625}} + Class + ShapedGraphic + ID + 330 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Virtual Machine Monitor} + VerticalPad + 0 + + + + Bounds + {{113.38582992553711, 76.535435676574707}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 331 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + virtimps + UniqueID + 5 + VPages + 1 + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Bounds + {{422.32284026200671, 112.55512142181396}, {57, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 386 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 HAL Mode} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + ID + 385 + Points + + {413.94883609820863, 107.71653842926025} + {413.85827922821045, 133.22834968566895} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Class + LineGraphic + ID + 384 + Points + + {405.35434198379517, 133.2283501625061} + {456.37796545028687, 133.2283501625061} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{421.82284026200671, 89.877955436706543}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 376 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hypervisor} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 385 + + ID + 375 + Points + + {414.63385624865185, 82.204726696014404} + {413.94883609820863, 107.71653842926025} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 371 + Position + 0.18186701834201813 + + + + Class + LineGraphic + ID + 374 + Points + + {405.35434198379517, 107.71653842926025} + {456.37796545028687, 107.71653842926025} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{421.82284026200671, 61.531497955322266}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 373 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 371 + Position + 0.1810183972120285 + + ID + 372 + Points + + {414.59055652364952, 53.858269214630127} + {414.59055652364952, 82.204726696014404} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Class + LineGraphic + ID + 371 + Points + + {405.35434198379517, 82.204726696014404} + {456.37796545028687, 82.204726696014404} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{423.64961152131457, 33.185040473937988}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 370 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 367 + Position + 0.1810183972120285 + + ID + 369 + Points + + {414.59055652364952, 28.346457481384277} + {414.59055652364952, 53.858269214630127} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 368 + Position + 0.1810183972120285 + + + + Class + LineGraphic + ID + 368 + Points + + {405.35434198379517, 28.346457481384277} + {456.37796545028687, 28.346457481384277} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 367 + Points + + {405.35434198379517, 53.858269214630127} + {456.37796545028687, 53.858269214630127} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{189.88188891465563, 101.21653842926025}, {57, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 356 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 HAL Mode} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 354 + Position + 0.1810183972120285 + + ID + 355 + Points + + {182.14960517629845, 96.377955436706543} + {182.14960517629845, 121.88976716995239} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Class + LineGraphic + ID + 354 + Points + + {172.91339063644409, 121.88976716995239} + {223.93701410293579, 121.88976716995239} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{189.38188891465563, 75.704726696014404}, {58, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 353 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 351 + Position + 0.1810183972120285 + + ID + 352 + Points + + {182.14960517629845, 70.866143703460693} + {182.14960517629845, 96.377955436706543} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + + + Class + LineGraphic + ID + 351 + Points + + {172.91339063644409, 96.377955436706543} + {223.93701410293579, 96.377955436706543} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Bounds + {{191.20866017396349, 50.192914962768555}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 350 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + Class + LineGraphic + Head + + ID + 347 + Position + 0.1810183972120285 + + ID + 349 + Points + + {182.14960517629845, 45.354331970214844} + {182.14960517629845, 70.866143703460693} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 348 + Position + 0.1810183972120285 + + + + Class + LineGraphic + ID + 348 + Points + + {172.91339063644409, 45.354331970214844} + {223.93701410293579, 45.354331970214844} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + LineGraphic + ID + 347 + Points + + {172.91339063644409, 70.866143703460693} + {223.93701410293579, 70.866143703460693} + + Style + + stroke + + HeadArrow + 0 + Legacy + + TailArrow + 0 + + + + + Class + Group + Graphics + + + Bounds + {{269.29134607315063, 116.2204761505127}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 301 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hardware} + VerticalPad + 0 + + + + Bounds + {{269.29134607315063, 104.88189220428467}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 302 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HAL} + VerticalPad + 0 + + + + Bounds + {{337.32284688949585, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 303 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{337.32284688949585, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 304 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + Bounds + {{269.29134607315063, 87.874018669128418}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 305 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 SEE} + VerticalPad + 0 + + + + Bounds + {{269.29134607315063, 76.535435676574707}, {130.39370727539062, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 306 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 SBI} + VerticalPad + 0 + + + + Bounds + {{269.29134607315063, 59.527561187744141}, {130.39370727539062, 17.00787353515625}} + Class + ShapedGraphic + ID + 307 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS} + VerticalPad + 0 + + + + Bounds + {{269.29134893417358, 48.18897819519043}, {62.362205505371094, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 308 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{269.29134893417358, 28.346457481384277}, {62.362205505371094, 19.842521667480469}} + Class + ShapedGraphic + ID + 309 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 300 + + + Class + Group + Graphics + + + Bounds + {{99.21260404586792, 104.8818941116333}, {65.196853637695312, 17.00787353515625}} + Class + ShapedGraphic + ID + 295 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Hardware} + VerticalPad + 0 + + + + Bounds + {{99.21260404586792, 93.543310165405273}, {65.196853637695312, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 296 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 HAL} + VerticalPad + 0 + + + + Bounds + {{99.212601184844971, 76.535435676574707}, {65.196853637695312, 17.00787353515625}} + Class + ShapedGraphic + ID + 297 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 AEE} + VerticalPad + 0 + + + + Bounds + {{99.21260404586792, 65.196852684020996}, {65.196853637695312, 11.338582992553711}} + Class + ShapedGraphic + FontInfo + + Color + + b + 1 + g + 1 + r + 1 + + + ID + 298 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + fill + + Color + + b + 0 + g + 0 + r + 0 + + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf1 ABI} + VerticalPad + 0 + + + + Bounds + {{99.21260404586792, 45.354331970214844}, {65.196853637695312, 19.842521667480469}} + Class + ShapedGraphic + ID + 299 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Rectangle + Style + + shadow + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Application} + VerticalPad + 0 + + + + ID + 294 + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + halmode + UniqueID + 6 + VPages + 1 + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Class + Group + Graphics + + + Class + LineGraphic + Head + + ID + 199 + + ID + 187 + Points + + {138.99959134947824, 89.96540545224984} + {154.27881497886654, 72.988468551742528} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 192 + + + + Class + Group + Graphics + + + Bounds + {{70.866100000000003, 130.39400000000001}, {79.370000000000005, 17.0078}} + Class + ShapedGraphic + ID + 189 + Shape + Bezier + ShapeData + + UnitPoints + + {0.5, -0.499998} + {0.5, -0.499998} + {0.28571400000000002, 0.5} + {0.28571400000000002, 0.5} + {0.28571400000000002, 0.50000199999999995} + {-0.5, 0.50000199999999995} + {-0.5, 0.50000199999999995} + {-0.50000100000000003, 0.50000199999999995} + {-0.25000099999999997, -0.499998} + {-0.25000099999999997, -0.499998} + {-0.25000099999999997, -0.499998} + {0.5, -0.499998} + + + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + Bounds + {{95.800600000000003, 96.724400000000003}, {48.766199999999998, 28}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 190 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host Console} + VerticalPad + 0 + + + + Bounds + {{93.543199999999999, 93.543400000000005}, {53.858400000000003, 34.015700000000002}} + Class + ShapedGraphic + ID + 191 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + Bounds + {{90.708600000000004, 90.708699999999993}, {59.5276, 39.685000000000002}} + Class + ShapedGraphic + ID + 192 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + ID + 188 + + + Bounds + {{36.8504, 24.3018}, {48.766199999999998, 28}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 193 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host Internet} + VerticalPad + 0 + + + + Class + LineGraphic + ID + 194 + Points + + {127.87752769763155, 48.920315915501725} + {96.378, 45.354300000000002} + {62.362200000000001, 70.866100000000003} + {36.8504, 51.023600000000002} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + LineType + 1 + TailArrow + FilledArrow + + + Tail + + ID + 199 + + + + Bounds + {{227.79499999999999, 49.6922}, {58.503900000000002, 28}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 195 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Proxied Devices} + VerticalPad + 0 + + + + Class + LineGraphic + Head + + ID + 211 + + ID + 196 + Points + + {206.27095305244924, 66.867379451305894} + {327.96441084459553, 112.3920954368151} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 199 + + + + Class + LineGraphic + Head + + ID + 198 + + ID + 197 + Points + + {179.0535685886594, 74.371575311247966} + {191.33879999999999, 107.71700000000003} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 199 + + + + Bounds + {{161.57499999999999, 107.717}, {59.5276, 42.5197}} + Class + ShapedGraphic + ID + 198 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Cylinder + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host File System} + VerticalPad + 0 + + + + Bounds + {{127.559, 34.015799999999999}, {87.873999999999995, 39.685000000000002}} + Class + ShapedGraphic + ID + 199 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Front-End Server} + + + + Bounds + {{85.039400000000001, 28.345800000000001}, {141.732, 141.733}} + Class + ShapedGraphic + ID + 200 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Align + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural + +\f0\fs24 \cf0 Front-End Host} + + TextPlacement + 2 + + + Class + Group + Graphics + + + Class + LineGraphic + Head + + ID + 211 + + ID + 202 + Points + + {390.30781664132661, 79.588696080972056} + {377.83555886873359, 107.02726799111332} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 203 + + + + Bounds + {{377.00799999999998, 51.021799999999999}, {39.685000000000002, 28.346399999999999}} + Class + ShapedGraphic + ID + 203 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App +\i n} + + + + Class + LineGraphic + Head + + ID + 211 + + ID + 204 + Points + + {363.87953741624483, 60.506779894173953} + {367.06667801810136, 106.72256740718751} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 205 + + + + Bounds + {{342.99200000000002, 31.1798}, {39.685000000000002, 28.346399999999999}} + Class + ShapedGraphic + ID + 205 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App 2} + + + + Bounds + {{407.77199999999999, 120.55800000000001}, {59.5276, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 206 + Rotation + 270 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + + + Bounds + {{424.53500000000003, 55.360799999999998}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 207 + Rotation + 270 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + AllowConnections + NO + AllowToConnect + + Class + LineGraphic + ID + 208 + Points + + {291.96800000000002, 90.707800000000006} + {447.87400000000002, 90.706800000000001} + + Style + + stroke + + HeadArrow + 0 + Legacy + + Pattern + 1 + TailArrow + 0 + + + + + Class + LineGraphic + Head + + ID + 211 + + ID + 209 + Points + + {333.608921579374, 81.031009447329296} + {353.46389754346319, 107.5045291946813} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 210 + + + + Bounds + {{297.63799999999998, 48.187800000000003}, {48.189, 34.015700000000002}} + Class + ShapedGraphic + ID + 210 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App 1} + + + + Bounds + {{308.976, 107.715}, {119.05500000000001, 39.685000000000002}} + Class + ShapedGraphic + ID + 211 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS Kernel} + + + + Bounds + {{291.96800000000002, 28.345800000000001}, {155.90600000000001, 141.733}} + Class + ShapedGraphic + ID + 212 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Align + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural + +\f0\fs24 \cf0 Target System} + + TextPlacement + 2 + + + ID + 201 + + + ID + 186 + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + Canvas 1 + UniqueID + 1 + VPages + 1 + + + ActiveLayerIndex + 0 + AutoAdjust + + BackgroundGraphic + + Bounds + {{0, 0}, {576, 733}} + Class + SolidGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasOrigin + {0, 0} + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1.000 cm = 1.000 cm + GraphicsList + + + Class + LineGraphic + Head + + ID + 211 + + ID + 196 + Points + + {237.01191575496489, 159.40188656382526} + {328.77471756498289, 153.84373772283652} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 224 + + + + Class + Group + Graphics + + + Class + LineGraphic + Head + + ID + 224 + + ID + 214 + Points + + {155.4630549800203, 112.59856659898557} + {177.53853765660827, 123.27929436596781} + {185.85024239569299, 141.99974457051832} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 216 + + + + Class + Group + Graphics + + + Bounds + {{97.123989590536311, 95.488724740583336}, {80.414446750150688, 16.674312563926623}} + Class + ShapedGraphic + ID + 216 + Shape + Bezier + ShapeData + + UnitPoints + + {0.5, -0.499998} + {0.5, -0.499998} + {0.28571400000000002, 0.5} + {0.28571400000000002, 0.5} + {0.28571400000000002, 0.50000199999999995} + {-0.5, 0.50000199999999995} + {-0.5, 0.50000199999999995} + {-0.50000100000000003, 0.50000199999999995} + {-0.25000099999999997, -0.499998} + {-0.25000099999999997, -0.499998} + {-0.25000099999999997, -0.499998} + {0.5, -0.499998} + + + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + Bounds + {{122.38660799159003, 62.479315275374205}, {49.40792482181174, 27.450978479870734}} + Class + ShapedGraphic + ID + 217 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host Console} + VerticalPad + 0 + + + + Bounds + {{120.09950238375592, 59.360688041643172}, {54.567134167170416, 33.348723167062104}} + Class + ShapedGraphic + ID + 218 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + Bounds + {{117.22760127807399, 56.581570588182828}, {60.310936378534329, 38.906860034773935}} + Class + ShapedGraphic + ID + 219 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + + + ID + 215 + + + Bounds + {{88.508415837161337, 124.87201866991595}, {49.40792482181174, 27.450978479870734}} + Class + ShapedGraphic + ID + 220 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host Internet} + VerticalPad + 0 + + + + Class + LineGraphic + ID + 221 + Points + + {152.40962935873975, 159.42687589708657} + {105.739831958026, 156.62806555034143} + {82.764252311386855, 162.18619408107037} + {39.685040473938457, 151.0699370196125} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + LineType + 1 + TailArrow + FilledArrow + + + Tail + + ID + 224 + + + + Class + LineGraphic + Head + + ID + 223 + + ID + 222 + Points + + {201.55001396003507, 141.80154983485838} + {216.30978910442363, 98.267729485454879} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 224 + + + + Bounds + {{186.15432091515646, 56.581751997220621}, {60.310936378534329, 41.685977488234272}} + Class + ShapedGraphic + ID + 223 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Shape + Cylinder + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Host File System} + VerticalPad + 0 + + + + Bounds + {{153.0708703994751, 142.51046784608545}, {83.286538540509753, 38.906860034773935}} + Class + ShapedGraphic + ID + 224 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Front-End Server} + + + + Bounds + {{62.660613001439756, 51.023623466491699}, {195.29214292660023, 141.73228454589844}} + Class + ShapedGraphic + ID + 225 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Align + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural + +\f0\fs24 \cf0 Front-End Host} + + TextPlacement + 2 + + + ID + 213 + + + Bounds + {{250.47216598510741, 129.06228094787599}, {58.503900000000002, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 195 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 HTIF} + VerticalPad + 0 + + + + Class + Group + Graphics + + + Class + LineGraphic + Head + + ID + 211 + + ID + 202 + Points + + {410.15087069815746, 102.26652483339915} + {397.67865936434953, 129.70509591954524} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 203 + + + + Bounds + {{396.85103229522696, 73.699623466491701}, {39.685000000000002, 28.346399999999999}} + Class + ShapedGraphic + ID + 203 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App +\i n} + + + + Class + LineGraphic + Head + + ID + 211 + + ID + 204 + Points + + {383.72256959795283, 83.184603364385737} + {386.90970985285782, 129.40039087797282} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 205 + + + + Bounds + {{362.83503229522699, 53.85762346649171}, {39.685000000000002, 28.346399999999999}} + Class + ShapedGraphic + ID + 205 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App 2} + + + + Bounds + {{427.61503229522697, 143.23582346649169}, {59.5276, 14}} + Class + ShapedGraphic + FitText + Vertical + Flow + Resize + ID + 206 + Rotation + 270 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 Supervisor} + VerticalPad + 0 + + + + Bounds + {{444.378032295227, 78.0386234664917}, {26, 14}} + Class + ShapedGraphic + FitText + YES + Flow + Resize + ID + 207 + Rotation + 270 + Shape + Rectangle + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + Draws + NO + + + Text + + Pad + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 User} + VerticalPad + 0 + + Wrap + NO + + + AllowConnections + NO + AllowToConnect + + Class + LineGraphic + ID + 208 + Points + + {311.81103229522699, 113.38562346649171} + {467.717032295227, 113.3846234664917} + + Style + + stroke + + HeadArrow + 0 + Legacy + + Pattern + 1 + TailArrow + 0 + + + + + Class + LineGraphic + Head + + ID + 211 + + ID + 209 + Points + + {353.4519001228449, 103.70885381694048} + {373.30675745839966, 130.18237015425552} + + Style + + stroke + + HeadArrow + FilledArrow + Legacy + + TailArrow + FilledArrow + + + Tail + + ID + 210 + + + + Bounds + {{317.48103229522695, 70.865623466491712}, {48.189, 34.015700000000002}} + Class + ShapedGraphic + ID + 210 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 App 1} + + + + Bounds + {{328.81903229522698, 130.39282346649171}, {119.05500000000001, 39.685000000000002}} + Class + ShapedGraphic + ID + 211 + Shape + Circle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc + +\f0\fs24 \cf0 OS Kernel} + + + + Bounds + {{311.81103229522699, 51.023623466491699}, {155.90600000000001, 141.733}} + Class + ShapedGraphic + ID + 212 + Shape + Rectangle + Style + + shadow + + Draws + NO + + stroke + + Width + 2 + + + Text + + Align + 0 + Text + {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 +\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural + +\f0\fs24 \cf0 Target System} + + TextPlacement + 2 + + + ID + 201 + + + GridInfo + + GridSpacing + 2.8346457481384277 + MajorGridSpacing + 10 + ShowsGrid + YES + SnapsToGrid + YES + + HPages + 1 + KeepToScale + + Layers + + + Lock + NO + Name + Layer 1 + Print + YES + View + YES + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + Canvas 2 + UniqueID + 2 + VPages + 1 + + + SmartAlignmentGuidesActive + YES + SmartDistanceGuidesActive + YES + UseEntirePage + + WindowInfo + + CurrentSheet + 3 + ExpandedCanvases + + Frame + {{4, 0}, {1362, 746}} + ListView + + OutlineWidth + 142 + RightSidebar + + ShowRuler + + Sidebar + + SidebarWidth + 120 + VisibleRegion + {{-19, 0}, {613.5, 303.5}} + Zoom + 2 + ZoomValues + + + Canvas 1 + 2 + 8 + + + Canvas 2 + 2 + 1 + + + privimps + 2 + 1 + + + halimps + 2 + 1 + + + virtimps + 2 + 1 + + + halmode + 2 + 1 + + + + + diff --git a/src/history.tex b/src/history.tex new file mode 100644 index 0000000..8305c8d --- /dev/null +++ b/src/history.tex @@ -0,0 +1,248 @@ +\newpage +\chapter{History and Acknowledgments} +\label{history} + +\section{History from Revision 1.0 of ISA manual} + +The RISC-V ISA and instruction set manual builds up several earlier +projects. Several aspects of the supervisor-level machine and the +overall format of the manual date back to the T0 (Torrent-0) vector +microprocessor project at UC Berkeley and ICSI, begun in 1992. T0 was +a vector processor based on the MIPS-II ISA, with Krste Asanovi\'{c} +as main architect and RTL designer, and Brian Kingsbury and Bertrand +Irrisou as principal VLSI implementors. David Johnson at ICSI was a +major contributor to the T0 ISA design, particularly supervisor mode, +and to the manual text. John Hauser also provided considerable +feedback on the T0 ISA design. + +The Scale (Software-Controlled Architecture for Low Energy) project at +MIT, begun in 2000, built upon the T0 project infrastructure, refined +the supervisor-level interface, and moved away from the MIPS scalar +ISA by dropping the branch delay slot. Ronny Krashinsky and +Christopher Batten were the principal architects of the Scale +Vector-Thread processor at MIT, while Mark Hampton ported the +GCC-based compiler infrastructure and tools for Scale. + +A lightly edited version of the T0 MIPS scalar processor specification +(MIPS-6371) was used in teaching a new version of the MIT 6.371 +Introduction to VLSI Systems class in the Fall 2002 semester, with +Chris Terman and Krste Asanovi\'{c} as lecturers. Chris Terman +contributed most of the lab material for the class (there was no +TA!). The 6.371 class evolved into the trial 6.884 Complex Digital +Design class at MIT, taught by Arvind and Krste Asanovi\'{c} in Spring +2005, which became a regular Spring class 6.375. A reduced version of +the Scale MIPS-based scalar ISA, named SMIPS, was used in 6.884/6.375. +Christopher Batten was the TA for the early offerings of these classes +and developed a considerable amount of documentation and lab material +based around the SMIPS ISA. This same SMIPS lab material was adapted +and enhanced by TA Yunsup Lee for the UC Berkeley Fall 2009 CS250 VLSI +Systems Design class taught by John Wawrzynek, Krste Asanovi\'{c}, and +John Lazzaro. + +The Maven (Malleable Array of Vector-thread ENgines) project was a +second-generation vector-thread architecture. Its design was led by +Christopher Batten when he was an Exchange Scholar at UC Berkeley starting +in summer 2007. Hidetaka Aoki, a visiting industrial fellow from +Hitachi, gave considerable feedback on the early Maven ISA and +microarchitecture design. The Maven infrastructure was based on the +Scale infrastructure but the Maven ISA moved further away from the +MIPS ISA variant defined in Scale, with a unified floating-point and +integer register file. Maven was designed to support experimentation +with alternative data-parallel accelerators. Yunsup Lee was the main +implementor of the various Maven vector units, while Rimas Avi\v{z}ienis +was the main implementor of the various Maven scalar units. +Yunsup Lee and Christopher Batten ported GCC to work with the new +Maven ISA. Christopher Celio provided the initial definition of a +traditional vector instruction set (``Flood'') variant of Maven. + +Based on experience with all these previous projects, the RISC-V ISA +definition was begun in Summer 2010. An initial version of the RISC-V +32-bit instruction subset was used in the UC Berkeley Fall 2010 CS250 +VLSI Systems Design class, with Yunsup Lee as TA. RISC-V is a clean +break from the earlier MIPS-inspired designs. John Hauser contributed +to the floating-point ISA definition, including the sign-injection +instructions and a register encoding scheme that permits +internal recoding of floating-point values. + +\section{History from Revision 2.0 of ISA manual} + +Multiple implementations of RISC-V processors have been completed, +including several silicon fabrications, as shown in +Figure~\ref{silicon}. + +\begin{table*}[!h] +\begin{center} +\begin{tabular}{|l|r|l|l|} +\hline +\multicolumn{1}{|c|}{Name} & \multicolumn{1}{|c|}{Tapeout Date} & \multicolumn{1}{|c|}{Process} & \multicolumn{1}{|c|}{ISA} \\ \hline +\hline +Raven-1 & May 29, 2011 & ST 28nm FDSOI & RV64G1\_Xhwacha1 \\ \hline +EOS14 & April 1, 2012 & IBM 45nm SOI & RV64G1p1\_Xhwacha2 \\ \hline +EOS16 & August 17, 2012 & IBM 45nm SOI & RV64G1p1\_Xhwacha2 \\ \hline +Raven-2 & August 22, 2012 & ST 28nm FDSOI & RV64G1p1\_Xhwacha2 \\ \hline +EOS18 & February 6, 2013 & IBM 45nm SOI & RV64G1p1\_Xhwacha2 \\ \hline +EOS20 & July 3, 2013 & IBM 45nm SOI & RV64G1p99\_Xhwacha2 \\ \hline +Raven-3 & September 26, 2013 & ST 28nm SOI & RV64G1p99\_Xhwacha2 \\ \hline +EOS22 & March 7, 2014 & IBM 45nm SOI & RV64G1p9999\_Xhwacha3 \\ \hline +\end{tabular} +\end{center} +\vspace{-0.15in} +\caption{Fabricated RISC-V testchips.} +\label{silicon} +\end{table*} + +The first RISC-V processors to be fabricated were written in Verilog and +manufactured in a pre-production \wunits{28}{nm} FDSOI technology from +ST as the Raven-1 testchip in 2011. Two cores were developed by Yunsup +Lee and Andrew Waterman, advised by Krste Asanovi\'{c}, and fabricated +together: 1) an RV64 scalar core with error-detecting flip-flops, and 2) +an RV64 core with an attached 64-bit floating-point vector unit. The +first microarchitecture was informally known as ``TrainWreck'', due to +the short time available to complete the design with immature design +libraries. + +Subsequently, a clean microarchitecture for an in-order decoupled RV64 +core was developed by Andrew Waterman, Rimas Avi\v{z}ienis, and Yunsup +Lee, advised by Krste Asanovi\'{c}, and, continuing the railway theme, +was codenamed ``Rocket'' after George Stephenson's successful steam +locomotive design. Rocket was written in Chisel, a new hardware +design language developed at UC Berkeley. The IEEE floating-point +units used in Rocket were developed by John Hauser, Andrew +Waterman, and Brian Richards. +Rocket has since been refined and developed further, and has been +fabricated two more times in \wunits{28}{nm} FDSOI (Raven-2, Raven-3), +and five times in IBM \wunits{45}{nm} SOI technology (EOS14, EOS16, +EOS18, EOS20, EOS22) for a photonics project. Work is ongoing to make +the Rocket design available as a parameterized RISC-V processor +generator. + +EOS14--EOS22 chips include early versions of Hwacha, a 64-bit IEEE +floating-point vector unit, developed by Yunsup Lee, Andrew Waterman, +Huy Vo, Albert Ou, Quan Nguyen, and Stephen Twigg, advised by Krste +Asanovi\'{c}. EOS16--EOS22 chips include dual cores with a +cache-coherence protocol developed by Henry Cook and Andrew Waterman, +advised by Krste Asanovi\'{c}. EOS14 silicon has successfully run at +\wunits{1.25}{GHz}. EOS16 silicon suffered from a bug in the IBM pad +libraries. EOS18 and EOS20 have successfully run at \wunits{1.35}{GHz}. + +Contributors to the Raven testchips include Yunsup Lee, Andrew Waterman, +Rimas Avi\v{z}ienis, Brian Zimmer, Jaehwa Kwak, Ruzica Jevti\'{c}, +Milovan Blagojevi\'{c}, Alberto Puggelli, Steven Bailey, Ben Keller, +Pi-Feng Chiu, Brian Richards, Borivoje Nikoli\'{c}, and Krste +Asanovi\'{c}. + +Contributors to the EOS testchips include Yunsup Lee, Rimas +Avi\v{z}ienis, Andrew Waterman, Henry Cook, Huy Vo, Daiwei Li, Chen Sun, +Albert Ou, Quan Nguyen, Stephen Twigg, Vladimir Stojanovi\'{c}, and +Krste Asanovi\'{c}. + +Andrew Waterman and Yunsup Lee developed the C++ ISA simulator +``Spike'', used as a golden model in development and named after the +golden spike used to celebrate completion of the US transcontinental +railway. Spike has been made available as a BSD open-source project. + +Andrew Waterman completed a Master's thesis with a preliminary design +of the RISC-V compressed instruction set~\cite{waterman-ms}. + +Various FPGA implementations of the RISC-V have been completed, +primarily as part of integrated demos for the Par Lab project research +retreats. The largest FPGA design has 3 cache-coherent RV64IMA +processors running a research operating system. Contributors to the +FPGA implementations include Andrew Waterman, Yunsup Lee, Rimas +Avi\v{z}ienis, and Krste Asanovi\'{c}. + +RISC-V processors have been used in several classes at UC Berkeley. +Rocket was used in the Fall 2011 offering of CS250 as a basis for class +projects, with Brian Zimmer as TA. For the undergraduate CS152 class in +Spring 2012, Christopher Celio used Chisel to write a suite of educational +RV32 processors, named ``Sodor'' after the island on which ``Thomas the +Tank Engine'' and friends live. The suite includes a microcoded core, +an unpipelined core, and 2, 3, and 5-stage pipelined cores, and is +publicly available under a BSD license. The suite was subsequently +updated and used again in CS152 in Spring 2013, with Yunsup Lee as TA, +and in Spring 2014, with Eric Love as TA. +Christopher Celio also developed an out-of-order RV64 design known as BOOM +(Berkeley Out-of-Order Machine), with accompanying pipeline +visualizations, that was used in the CS152 classes. The CS152 classes +also used cache-coherent versions of the Rocket core developed by Andrew +Waterman and Henry Cook. + +Over the summer of 2013, the RoCC (Rocket Custom Coprocessor) +interface was defined to simplify adding custom accelerators to the +Rocket core. Rocket and the RoCC interface were used extensively in +the Fall 2013 CS250 VLSI class taught by Jonathan Bachrach, with +several student accelerator projects built to the RoCC interface. The +Hwacha vector unit has been rewritten as a RoCC coprocessor. + +Two Berkeley undergraduates, Quan Nguyen and Albert Ou, have +successfully ported Linux to run on RISC-V in Spring 2013. + +Colin Schmidt successfully completed an LLVM backend for RISC-V 2.0 in +January 2014. + +Darius Rad at Bluespec contributed soft-float ABI support to the GCC port in +March 2014. + +John Hauser contributed the definition of the floating-point classification +instructions. + +We are aware of several other RISC-V core implementations, including +one in Verilog by Tommy Thorn, and one in Bluespec by Rishiyur Nikhil. + +\section*{Acknowledgments} + +Thanks to Christopher F. Batten, Preston Briggs, Christopher Celio, David +Chisnall, Stefan Freudenberger, John Hauser, Ben Keller, Rishiyur +Nikhil, Michael Taylor, Tommy Thorn, and Robert Watson for comments on +the draft ISA version 2.0 specification. + +\section{History for Revision 2.1} + +Uptake of the RISC-V ISA has been very rapid since the introduction of +the frozen version 2.0 in May 2014, with too much activity to record +in a short history section such as this. Perhaps the most important +single event was the formation of the non-profit RISC-V Foundation in +August 2015. The Foundation will now take over stewardship of the +official RISC-V ISA standard, and the official website {\tt riscv.org} +is the best place to obtain news and updates on the RISC-V standard. + +\section*{Acknowledgments} + +Thanks to Scott Beamer, Allen J. Baum, Christopher Celio, David Chisnall, +Paul Clayton, Palmer Dabbelt, Jan Gray, Michael Hamburg, and John +Hauser for comments on the version 2.0 specification. + +\section{History for Revision 2.2} + + +\section*{Acknowledgments} + +Thanks to Alex Bradbury and David Horner for comments on the version 2.1 +specification. + +\section{Funding} + +Development of the RISC-V architecture and implementations has been +partially funded by the following sponsors. +\begin{itemize} + +\item {\bf Par Lab:} Research supported by Microsoft (Award \#024263) and Intel (Award + \#024894) funding and by matching funding by U.C. Discovery + (Award \#DIG07-10227). Additional support came from Par Lab + affiliates Nokia, NVIDIA, Oracle, and Samsung. + +\item {\bf Project Isis:} DoE Award DE-SC0003624. + +\item {\bf ASPIRE Lab}: DARPA PERFECT program, Award + HR0011-12-2-0016. DARPA POEM program Award HR0011-11-C-0100. The + Center for Future Architectures Research (C-FAR), a STARnet center + funded by the Semiconductor Research Corporation. Additional + support from ASPIRE industrial sponsor, Intel, and ASPIRE + affiliates, Google, Hewlett Packard Enterprise, Huawei, Nokia, + NVIDIA, Oracle, and Samsung. + +\end{itemize} + +The content of this paper does not necessarily reflect the position or the +policy of the US government and no official endorsement should be +inferred. diff --git a/src/hypervisor.tex b/src/hypervisor.tex new file mode 100644 index 0000000..03b52d2 --- /dev/null +++ b/src/hypervisor.tex @@ -0,0 +1,11 @@ +\chapter{Hypervisor-Level ISA} +\label{hypervisor} + +This chapter is a placeholder for a future RISC-V hypervisor-level +common core specification. + +\begin{commentary} +The privileged architecture is designed to simplify the use of classic +virtualization techniques, where a guest OS is run at user-level, as +the few privileged instructions can be easily detected and trapped. +\end{commentary} diff --git a/src/instr-table.tex b/src/instr-table.tex new file mode 100644 index 0000000..90b09cd --- /dev/null +++ b/src/instr-table.tex @@ -0,0 +1,1958 @@ + +\newpage + +\begin{table}[p] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} +& & & & & & & & & & \\ + & +\multicolumn{1}{l}{\instbit{31}} & +\multicolumn{1}{r}{\instbit{27}} & +\instbit{26} & +\instbit{25} & +\multicolumn{1}{l}{\instbit{24}} & +\multicolumn{1}{r}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{funct7} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R-type \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & I-type \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & S-type \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{opcode} & SB-type \\ +\cline{2-11} + + +& +\multicolumn{8}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & U-type \\ +\cline{2-11} + + +& +\multicolumn{8}{|c|}{imm[20$\vert$10:1$\vert$11$\vert$19:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & UJ-type \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV32I Base Instruction Set} & \\ +\cline{2-11} + + +& +\multicolumn{8}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110111} & LUI \\ +\cline{2-11} + + +& +\multicolumn{8}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010111} & AUIPC \\ +\cline{2-11} + + +& +\multicolumn{8}{|c|}{imm[20$\vert$10:1$\vert$11$\vert$19:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1101111} & JAL \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1100111} & JALR \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BEQ \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BNE \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BLT \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BGE \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BLTU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[12$\vert$10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{imm[4:1$\vert$11]} & +\multicolumn{1}{c|}{1100011} & BGEU \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LB \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LH \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LW \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LBU \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LHU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100011} & SB \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100011} & SH \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100011} & SW \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & ADDI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SLTI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SLTIU \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & XORI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & ORI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & ANDI \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SLLI \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SRLI \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SRAI \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & ADD \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SUB \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SLL \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SLT \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SLTU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & XOR \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SRL \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & SRA \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & OR \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & AND \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{0000} & +\multicolumn{3}{c|}{pred} & +\multicolumn{1}{c|}{succ} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{0001111} & FENCE \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{0000} & +\multicolumn{3}{c|}{0000} & +\multicolumn{1}{c|}{0000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{0001111} & FENCE.I \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000000000000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & ECALL \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000000000001} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & EBREAK \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRW \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRS \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRC \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{zimm} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRWI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{zimm} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRSI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{csr} & +\multicolumn{1}{c|}{zimm} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1110011} & CSRRCI \\ +\cline{2-11} + + +\end{tabular} +\end{center} +\end{small} + +\label{instr-table} +\end{table} + + +\newpage + +\begin{table}[p] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} +& & & & & & & & & & \\ + & +\multicolumn{1}{l}{\instbit{31}} & +\multicolumn{1}{r}{\instbit{27}} & +\instbit{26} & +\instbit{25} & +\multicolumn{1}{l}{\instbit{24}} & +\multicolumn{1}{r}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{funct7} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R-type \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & I-type \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & S-type \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV64I Base Instruction Set (in addition to RV32I)} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LWU \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000011} & LD \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100011} & SD \\ +\cline{2-11} + + +& +\multicolumn{3}{|c|}{000000} & +\multicolumn{3}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SLLI \\ +\cline{2-11} + + +& +\multicolumn{3}{|c|}{000000} & +\multicolumn{3}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SRLI \\ +\cline{2-11} + + +& +\multicolumn{3}{|c|}{010000} & +\multicolumn{3}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0010011} & SRAI \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0011011} & ADDIW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0011011} & SLLIW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0011011} & SRLIW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{shamt} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0011011} & SRAIW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & ADDW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & SUBW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & SLLW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & SRLW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & SRAW \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV32M Standard Extension} & \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & MUL \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & MULH \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & MULHSU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & MULHU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & DIV \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & DIVU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & REM \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0110011} & REMU \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV64M Standard Extension (in addition to RV32M)} & \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & MULW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{100} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & DIVW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{101} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & DIVUW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{110} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & REMW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{111} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0111011} & REMUW \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV32A Standard Extension} & \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00010} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & LR.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00011} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & SC.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00001} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOSWAP.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOADD.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOXOR.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{01100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOAND.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{01000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOOR.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{10000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMIN.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{10100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMAX.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{11000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMINU.W \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{11100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMAXU.W \\ +\cline{2-11} + + +\end{tabular} +\end{center} +\end{small} + +\label{instr-table} +\end{table} + + +\newpage + +\begin{table}[p] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} +& & & & & & & & & & \\ + & +\multicolumn{1}{l}{\instbit{31}} & +\multicolumn{1}{r}{\instbit{27}} & +\instbit{26} & +\instbit{25} & +\multicolumn{1}{l}{\instbit{24}} & +\multicolumn{1}{r}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{funct7} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R-type \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{funct2} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R4-type \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & I-type \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & S-type \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV64A Standard Extension (in addition to RV32A)} & \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00010} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & LR.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00011} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & SC.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00001} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOSWAP.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOADD.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{00100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOXOR.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{01100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOAND.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{01000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOOR.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{10000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMIN.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{10100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMAX.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{11000} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMINU.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{11100} & +\multicolumn{1}{c|}{aq} & +\multicolumn{1}{c|}{rl} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0101111} & AMOMAXU.D \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV32F Standard Extension} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000111} & FLW \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100111} & FSW \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{00} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1000011} & FMADD.S \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{00} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1000111} & FMSUB.S \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{00} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1001011} & FNMSUB.S \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{00} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1001111} & FNMADD.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FADD.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000100} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSUB.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0001000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMUL.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0001100} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FDIV.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0101100} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSQRT.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJ.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJN.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJX.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010100} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMIN.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010100} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMAX.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100000} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.W.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100000} & +\multicolumn{2}{c|}{00001} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.WU.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1110000} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMV.X.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FEQ.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FLT.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010000} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FLE.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1110000} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCLASS.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101000} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.S.W \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101000} & +\multicolumn{2}{c|}{00001} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.S.WU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1111000} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMV.S.X \\ +\cline{2-11} + + +\end{tabular} +\end{center} +\end{small} + +\label{instr-table} +\end{table} + + +\newpage + +\begin{table}[p] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} +& & & & & & & & & & \\ + & +\multicolumn{1}{l}{\instbit{31}} & +\multicolumn{1}{r}{\instbit{27}} & +\instbit{26} & +\instbit{25} & +\multicolumn{1}{l}{\instbit{24}} & +\multicolumn{1}{r}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{funct7} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R-type \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{funct2} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & R4-type \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & I-type \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & S-type \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV64F Standard Extension (in addition to RV32F)} & \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100000} & +\multicolumn{2}{c|}{00010} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.L.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100000} & +\multicolumn{2}{c|}{00011} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.LU.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101000} & +\multicolumn{2}{c|}{00010} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.S.L \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101000} & +\multicolumn{2}{c|}{00011} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.S.LU \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV32D Standard Extension} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{0000111} & FLD \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{011} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{0100111} & FSD \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{01} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1000011} & FMADD.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{01} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1000111} & FMSUB.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{01} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1001011} & FNMSUB.D \\ +\cline{2-11} + + +& +\multicolumn{2}{|c|}{rs3} & +\multicolumn{2}{c|}{01} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1001111} & FNMADD.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FADD.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0000101} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSUB.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0001001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMUL.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0001101} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FDIV.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0101101} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSQRT.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJ.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJN.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FSGNJX.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010101} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMIN.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0010101} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMAX.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100000} & +\multicolumn{2}{c|}{00001} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.S.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{0100001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.D.S \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{010} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FEQ.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FLT.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1010001} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FLE.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1110001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{001} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCLASS.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.W.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100001} & +\multicolumn{2}{c|}{00001} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.WU.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.D.W \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101001} & +\multicolumn{2}{c|}{00001} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.D.WU \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf RV64D Standard Extension (in addition to RV32D)} & \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100001} & +\multicolumn{2}{c|}{00010} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.L.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1100001} & +\multicolumn{2}{c|}{00011} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.LU.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1110001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMV.X.D \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101001} & +\multicolumn{2}{c|}{00010} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.D.L \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1101001} & +\multicolumn{2}{c|}{00011} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FCVT.D.LU \\ +\cline{2-11} + + +& +\multicolumn{4}{|c|}{1111001} & +\multicolumn{2}{c|}{00000} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{1010011} & FMV.D.X \\ +\cline{2-11} + + +\end{tabular} +\end{center} +\end{small} +\caption{Instruction listing for RISC-V} +\label{instr-table} +\end{table} + diff --git a/src/intro.tex b/src/intro.tex new file mode 100644 index 0000000..f10f975 --- /dev/null +++ b/src/intro.tex @@ -0,0 +1,502 @@ +\chapter{Introduction} + +RISC-V (pronounced ``risk-five'') is a new instruction set +architecture (ISA) that was originally designed to support computer +architecture research and education, but which we now hope will also +become a standard free and open architecture for industry +implementations. Our goals in defining RISC-V include: +\vspace{-0.1in} +\begin{itemize} +\parskip 0pt +\itemsep 1pt +\item A completely {\em open} ISA that is freely available to + academia and industry. +\item A {\em real} ISA suitable for direct native hardware implementation, + not just simulation or binary translation. +\item An ISA that avoids ``over-architecting'' for a particular + microarchitecture style (e.g., microcoded, in-order, decoupled, + out-of-order) or implementation technology (e.g., full-custom, ASIC, + FPGA), but which allows efficient implementation in any of these. +\item An ISA separated into a {\em small} base integer ISA, usable by + itself as a base for customized accelerators or for educational + purposes, and optional standard extensions, to support + general-purpose software development. +\item Support for the revised 2008 IEEE-754 floating-point standard~\cite{ieee754-2008}. +\item An ISA supporting extensive user-level ISA extensions and + specialized variants. +\item Both 32-bit and 64-bit address space variants for + applications, operating system kernels, and hardware implementations. +\item An ISA with support for highly-parallel multicore + or manycore implementations, including heterogeneous multiprocessors. +\item Optional {\em variable-length instructions} to both expand available + instruction encoding space and to support an optional {\em dense + instruction encoding} for improved performance, static code size, + and energy efficiency. +\item A fully virtualizable ISA to ease hypervisor development. +\item An ISA that simplifies experiments with new supervisor-level and + hypervisor-level ISA designs. +\end{itemize} +\vspace{-0.1in} + +\begin{commentary} +Commentary on our design decisions is formatted as in this paragraph, +and can be skipped if the reader is only interested in the +specification itself. +\end{commentary} +\begin{commentary} +The name RISC-V was chosen to represent the fifth major RISC ISA +design from UC Berkeley (RISC-I~\cite{riscI-isca1981}, +RISC-II~\cite{Katevenis:1983}, SOAR~\cite{Ungar:1984}, and +SPUR~\cite{spur-jsscc1989} were the first four). We also pun on the +use of the Roman numeral ``V'' to signify ``variations'' and +``vectors'', as support for a range of architecture research, +including various data-parallel accelerators, is an explicit goal of +the ISA design. +\end{commentary} + +\begin{commentary} +We developed RISC-V to support our own needs in research and +education, where our group is particularly interested in actual +hardware implementations of research ideas (we have completed eleven +different silicon fabrications of RISC-V since the first edition of +this specification), and in providing real implementations for +students to explore in classes (RISC-V processor RTL designs have been +used in multiple undergraduate and graduate classes at Berkeley). In +our current research, we are especially interested in the move towards +specialized and heterogeneous accelerators, driven by the power +constraints imposed by the end of conventional transistor scaling. We +wanted a highly flexible and extensible base ISA around which to build +our research effort. + +A question we have been repeatedly asked is ``Why develop a new ISA?'' +The biggest obvious benefit of using an existing commercial ISA is the +large and widely supported software ecosystem, both development tools +and ported applications, which can be leveraged in research and +teaching. Other benefits include the existence of large amounts of +documentation and tutorial examples. However, our experience of using +commercial instruction sets for research and teaching is that these +benefits are smaller in practice, and do not outweigh the +disadvantages: + +\begin{itemize} +\item {\bf Commercial ISAs are proprietary.} Except for SPARC V8, + which is an open IEEE standard~\cite{sparcieee1994}, most owners of + commercial ISAs carefully guard their intellectual property and do + not welcome freely available competitive implementations. This is + much less of an issue for academic research and teaching using only + software simulators, but has been a major concern for groups wishing + to share actual RTL implementations. It is also a major concern for + entities who do not want to trust the few sources of commercial ISA + implementations, but who are prohibited from creating their own + clean room implementations. We cannot guarantee that all RISC-V + implementations will be free of third-party patent infringements, + but we can guarantee we will not attempt to sue a RISC-V + implementor. + +\item {\bf Commercial ISAs are only popular in certain market + domains.} The most obvious examples at time of writing are that + the ARM architecture is not well supported in the server space, and + the Intel x86 architecture (or for that matter, almost every other + architecture) is not well supported in the mobile space, though both + Intel and ARM are attempting to enter each other's market segments. + Another example is ARC and Tensilica, which provide extensible cores + but are focused on the embedded space. This market segmentation + dilutes the benefit of supporting a particular commercial ISA as in + practice the software ecosystem only exists for certain domains, and + has to be built for others. + +\item {\bf Commercial ISAs come and go.} Previous research + infrastructures have been built around commercial ISAs that are no + longer popular (SPARC, MIPS) or even no longer in production + (Alpha). These lose the benefit of an active software ecosystem, + and the lingering intellectual property issues around the ISA and + supporting tools interfere with the ability of interested third + parties to continue supporting the ISA. An open ISA might also lose + popularity, but any interested party can continue using and + developing the ecosystem. + +\item {\bf Popular commercial ISAs are complex.} The dominant + commercial ISAs (x86 and ARM) are both very complex to implement in + hardware to the level of supporting common software stacks and + operating systems. Worse, nearly all the complexity is due to bad, + or at least outdated, ISA design decisions rather than features that + truly improve efficiency. + +\item {\bf Commercial ISAs alone are not enough to bring up + applications.} Even if we expend the effort to implement a + commercial ISA, this is not enough to run existing applications for + that ISA. Most applications need a complete ABI (application binary + interface) to run, not just the user-level ISA. Most ABIs rely on + libraries, which in turn rely on operating system support. To run an + existing operating system requires implementing the supervisor-level + ISA and device interfaces expected by the OS. These are usually + much less well-specified and considerably more complex to + implement than the user-level ISA. + +\item {\bf Popular commercial ISAs were not designed for extensibility.} The + dominant commercial ISAs were not particularly designed for + extensibility, and as a consequence have added considerable + instruction encoding complexity as their instruction sets have + grown. Companies such as Tensilica (acquired by Cadence) and ARC + (acquired by Synopsys) have built ISAs and toolchains around + extensibility, but have focused on embedded applications rather than + general-purpose computing systems. + +\item {\bf A modified commercial ISA is a new ISA.} One of our main + goals is to support architecture research, including major ISA + extensions. Even small extensions diminish the benefit of using a + standard ISA, as compilers have to be modified and applications + rebuilt from source code to use the extension. Larger extensions + that introduce new architectural state also require modifications to + the operating system. Ultimately, the modified commercial ISA + becomes a new ISA, but carries along all the legacy baggage of the + base ISA. +\end{itemize} + +Our position is that the ISA is perhaps the most important interface +in a computing system, and there is no reason that such an important +interface should be proprietary. The dominant commercial ISAs are +based on instruction set concepts that were already well known over 30 +years ago. Software developers should be able to target an open +standard hardware target, and commercial processor designers should +compete on implementation quality. + +We are far from the first to contemplate an open ISA design suitable +for hardware implementation. We also considered other existing open +ISA designs, of which the closest to our goals was the OpenRISC +architecture~\cite{openriscarch}. We decided against adopting the +OpenRISC ISA for several technical reasons: + +\begin{itemize} +\item OpenRISC has condition codes and branch delay slots, which + complicate higher performance implementations. +\item OpenRISC uses a fixed 32-bit encoding and 16-bit immediates, + which precludes a denser instruction encoding and limits space for + later expansion of the ISA. +\item OpenRISC does not support the 2008 revision to the IEEE 754 + floating-point standard. +\item The OpenRISC 64-bit design had not been completed when we began. +\end{itemize} + +By starting from a clean slate, we could design an ISA that met all of +our goals, though of course, this took far more effort than we had +planned at the outset. We have now invested considerable effort in +building up the RISC-V ISA infrastructure, including documentation, +compiler tool chains, operating system ports, reference ISA +simulators, FPGA implementations, efficient ASIC implementations, +architecture test suites, and teaching materials. Since the last +edition of this manual, there has been considerable uptake of the +RISC-V ISA in both academia and industry, and we have created the +non-profit RISC-V Foundation to protect and promote the standard. The +RISC-V Foundation website at \url{http://riscv.org} contains the latest +information on the Foundation membership and various open-source +projects using RISC-V. +\end{commentary} + +The RISC-V manual is structured in two volumes. This volume covers +the user-level ISA design, including optional ISA extensions. The +second volume provides the privileged architecture. + +\begin{commentary} +In this user-level manual, we aim to remove any dependence on +particular microarchitectural features or on privileged architecture +details. This is both for clarity and to allow maximum flexibility +for alternative implementations. +\end{commentary} + +\section{RISC-V ISA Overview} + +The RISC-V ISA is defined as a base integer ISA, which must be present +in any implementation, plus optional extensions to the base ISA. The +base integer ISA is very similar to that of the early RISC processors +except with no branch delay slots and with support for optional +variable-length instruction encodings. The base is carefully +restricted to a minimal set of instructions sufficient to provide a +reasonable target for compilers, assemblers, linkers, and operating +systems (with additional supervisor-level operations), and so provides +a convenient ISA and software toolchain ``skeleton'' around which more +customized processor ISAs can be built. + +Each base integer instruction set is characterized by the width of the +integer registers and the corresponding size of the user address +space. There are two primary base integer variants, RV32I and RV64I, +described in Chapters~\ref{rv32} and \ref{rv64}, which provide 32-bit +or 64-bit user-level address spaces respectively. Hardware +implementations and operating systems might provide only one or both +of RV32I and RV64I for user programs. Chapter~\ref{rv32e} describes +the RV32E subset variant of the RV32I base instruction set, which has +been added to support small microcontrollers. Chapter~\ref{rv128} +describes a future RV128I variant of the base integer instruction set +supporting a flat 128-bit user address space. + +\begin{commentary} +Although 64-bit address spaces are a requirement for larger systems, +we believe 32-bit address spaces will remain adequate for many +embedded and client devices for decades to come and will be desirable +to lower memory traffic and energy consumption. In addition, 32-bit +address spaces are sufficient for educational purposes. A larger flat +128-bit address space might eventually be required, so we ensured this +could be accommodated within the RISC-V ISA framework. +\end{commentary} + +The base integer ISA may be subset by a hardware implementation, but +opcode traps and software emulation by a more privileged layer must +then be used to implement functionality not provided by hardware. + +\begin{commentary} +Subsets of the base integer ISA might be useful for pedagogical +purposes, but the base has been defined such that there should be +little incentive to subset a real hardware implementation beyond +omitting support for misaligned memory accesses and treating all SYSTEM +instructions as a single trap. +\end{commentary} + +RISC-V has been designed to support extensive customization and +specialization. The base integer ISA can be extended with one or more +optional instruction-set extensions, but the base integer instructions +cannot be redefined. We divide RISC-V instruction-set extensions +into {\em standard} and {\em non-standard} extensions. Standard +extensions should be generally useful and should not conflict with +other standard extensions. Non-standard extensions may be highly +specialized, or may conflict with other standard or non-standard +extensions. Instruction-set extensions may provide slightly different +functionality depending on the width of the base integer instruction +set. Chapter~\ref{extensions} describes various ways of extending the +RISC-V ISA. We have also developed a naming convention for RISC-V +base instructions and instruction-set extensions, described in detail +in Chapter~\ref{naming}. + +To support more general software development, a set of standard +extensions are defined to provide integer multiply/divide, atomic +operations, and single and double-precision floating-point arithmetic. +The base integer ISA is named ``I'' (prefixed by RV32 or RV64 +depending on integer register width), and contains integer +computational instructions, integer loads, integer stores, and +control-flow instructions, and is mandatory for all RISC-V +implementations. The standard integer multiplication and division +extension is named ``M'', and adds instructions to multiply and divide +values held in the integer registers. The standard atomic instruction +extension, denoted by ``A'', adds instructions that atomically read, +modify, and write memory for inter-processor synchronization. The +standard single-precision floating-point extension, denoted by ``F'', +adds floating-point registers, single-precision computational +instructions, and single-precision loads and stores. The standard +double-precision floating-point extension, denoted by ``D'', expands +the floating-point registers, and adds double-precision computational +instructions, loads, and stores. An integer base plus these four +standard extensions (``IMAFD'') is given the abbreviation ``G'' and +provides a general-purpose scalar instruction set. RV32G and RV64G +are currently the default target of our compiler toolchains. Later +chapters describe these and other planned standard RISC-V extensions. + +Beyond the base integer ISA and the standard extensions, it is rare +that a new instruction will provide a significant benefit for all +applications, although it may be very beneficial for a certain domain. +As energy efficiency concerns are forcing greater specialization, we +believe it is important to simplify the required portion of an ISA +specification. Whereas other architectures usually treat their ISA as +a single entity, which changes to a new version as instructions are +added over time, RISC-V will endeavor to keep the base and each +standard extension constant over time, and instead layer new +instructions as further optional extensions. For example, the base +integer ISAs will continue as fully supported standalone ISAs, +regardless of any subsequent extensions. +\begin{commentary} +With the 2.0 release of the user ISA specification, we intend the +``RV32IMAFD'' and ``RV64IMAFD''base and standard extensions +(aka. ``RV32G'' and ``RV64G'') to remain constant for future +development. +\end{commentary} + +\section{Instruction Length Encoding} + +The base RISC-V ISA has fixed-length 32-bit instructions that must be +naturally aligned on 32-bit boundaries. However, the standard RISC-V +encoding scheme is designed to support ISA extensions with +variable-length instructions, where each instruction can be any number +of 16-bit instruction {\em parcels} in length and parcels are +naturally aligned on 16-bit boundaries. The standard compressed ISA +extension described in Chapter~\ref{compressed} reduces code size by +providing compressed 16-bit instructions and relaxes the alignment +constraints to allow all instructions (16 bit and 32 bit) to be +aligned on any 16-bit boundary to improve code density. + +Figure~\ref{instlengthcode} illustrates the standard RISC-V +instruction-length encoding convention. All the 32-bit instructions +in the base ISA have their lowest two bits set to {\tt 11}. The +optional compressed 16-bit instruction-set extensions have their +lowest two bits equal to {\tt 00}, {\tt 01}, or {\tt 10}. Standard +instruction-set extensions encoded with more than 32 bits have +additional low-order bits set to {\tt 1}, with the conventions for +48-bit and 64-bit lengths shown in Figure~\ref{instlengthcode}. +Instruction lengths between 80 bits and 176 bits are encoded using a +3-bit field in bits [14:12] giving the number of 16-bit words in +addition to the first 5$\times$16-bit words. The encoding with bits +[14:12] set to {\tt 111} is reserved for future longer instruction +encodings. + +\begin{figure}[hb] +{ +\begin{center} +\begin{tabular}{ccccl} +\cline{4-4} +& & & \multicolumn{1}{|c|}{\tt xxxxxxxxxxxxxxaa} & 16-bit ({\tt aa} +$\neq$ {\tt 11})\\ +\cline{4-4} +\\ +\cline{3-4} +& & \multicolumn{1}{|c|}{\tt xxxxxxxxxxxxxxxx} +& \multicolumn{1}{c|}{\tt xxxxxxxxxxxbbb11} & 32-bit ({\tt bbb} +$\neq$ {\tt 111}) \\ +\cline{3-4} +\\ +\cline{2-4} +\hspace{0.1in} +& \multicolumn{1}{c|}{$\cdot\cdot\cdot${\tt xxxx} } +& \multicolumn{1}{c|}{\tt xxxxxxxxxxxxxxxx} +& \multicolumn{1}{c|}{\tt xxxxxxxxxx011111} & 48-bit \\ +\cline{2-4} +\\ +\cline{2-4} +\hspace{0.1in} +& \multicolumn{1}{c|}{$\cdot\cdot\cdot${\tt xxxx} } +& \multicolumn{1}{c|}{\tt xxxxxxxxxxxxxxxx} +& \multicolumn{1}{c|}{\tt xxxxxxxxx0111111} & 64-bit \\ +\cline{2-4} +\\ +\cline{2-4} +\hspace{0.1in} +& \multicolumn{1}{c|}{$\cdot\cdot\cdot${\tt xxxx} } +& \multicolumn{1}{c|}{\tt xxxxxxxxxxxxxxxx} +& \multicolumn{1}{c|}{\tt xnnnxxxxx1111111} & (80+16*{\tt nnn})-bit, + {\tt nnn}$\neq${\tt 111} \\ +\cline{2-4} +\\ +\cline{2-4} +\hspace{0.1in} +& \multicolumn{1}{c|}{$\cdot\cdot\cdot${\tt xxxx} } +& \multicolumn{1}{c|}{\tt xxxxxxxxxxxxxxxx} +& \multicolumn{1}{c|}{\tt x111xxxxx1111111} & Reserved for $\geq$192-bits \\ +\cline{2-4} +\\ +Byte Address: & \multicolumn{1}{r}{base+4} & \multicolumn{1}{r}{base+2} & \multicolumn{1}{r}{base} & \\ + \end{tabular} +\end{center} +} +\caption{RISC-V instruction length encoding.} +\label{instlengthcode} +\end{figure} + +\begin{commentary} +Given the code size and energy savings of a compressed format, we +wanted to build in support for a compressed format to the ISA encoding +scheme rather than adding this as an afterthought, but to allow +simpler implementations we didn't want to make the compressed format +mandatory. We also wanted to optionally allow longer instructions to +support experimentation and larger instruction-set extensions. +Although our encoding convention required a tighter encoding of the +core RISC-V ISA, this has several beneficial effects. + +An implementation of the standard G ISA need only hold the +most-significant 30 bits in instruction caches (a 6.25\% saving). On +instruction cache refills, any instructions encountered with either +low bit clear should be recoded into illegal 30-bit instructions +before storing in the cache to preserve illegal instruction exception +behavior. + +Perhaps more importantly, by condensing our base ISA into a subset of +the 32-bit instruction word, we leave more space available for custom +extensions. In particular, the base RV32I ISA uses less than 1/8 of +the encoding space in the 32-bit instruction word. As described in +Chapter~\ref{extensions}, an implementation that does not require +support for the standard compressed instruction extension can map 3 +additional 30-bit instruction spaces into the 32-bit fixed-width +format, while preserving support for standard $>=$32-bit +instruction-set extensions. Further, if the implementation also does +not need instructions $>$32-bits in length, it can recover a further +four major opcodes. +\end{commentary} +\begin{commentary} +We consider it a feature that any length of instruction containing all +zero bits is not legal, as this quickly traps erroneous jumps into +zeroed memory regions. Similarly, we also reserve the instruction +encoding containing all ones to be an illegal instruction, to catch +the other common pattern observed with unprogrammed non-volatile +memory devices, disconnected memory buses, or broken memory devices. +\end{commentary} + +The base RISC-V ISA has a little-endian memory system, but +non-standard variants can provide a big-endian or bi-endian memory +system. Instructions are stored in memory with each 16-bit parcel +stored in a memory halfword according to the implementation's natural +endianness. Parcels comprising one instruction are stored at +increasing halfword addresses, with the lowest addressed parcel +holding the lowest numbered bits in the instruction specification, +i.e., instructions are always stored in a little-endian sequence of +parcels regardless of the memory system endianness. The code sequence +in Figure~\ref{fig:storeinstruction} will store a 32-bit instruction +to memory correctly regardless of memory system endianness. + +\begin{figure}[ht] +\begin{verbatim} + // Store 32-bit instruction in x2 register to location pointed to by x3. + sh x2, 0(x3) // Store low bits of instruction in first parcel. + srli x2, x2, 16 // Move high bits down to low bits, overwriting x2. + sh x2, 2(x3) // Store high bits in second parcel. +\end{verbatim} +\caption{Recommended code sequence to store 32-bit instruction from register to + memory. Operates correctly on both big- and little-endian + memory systems and avoids misaligned accesses when used with variable-length + instruction-set extensions.} +\label{fig:storeinstruction} +\end{figure} + +\begin{commentary} +We chose little-endian byte ordering for the RISC-V memory system +because little-endian systems are currently dominant commercially (all +x86 systems; iOS, Android, and Windows for ARM). A minor point is +that we have also found little-endian memory systems to be more +natural for hardware designers. However, certain application areas, +such as IP networking, operate on big-endian data structures, and so +we leave open the possibility of non-standard big-endian or bi-endian +systems. + +We have to fix the order in which instruction parcels are stored in +memory, independent of memory system endianness, to ensure that the +length-encoding bits always appear first in halfword address +order. This allows the length of a variable-length instruction to be +quickly determined by an instruction fetch unit by examining only the +first few bits of the first 16-bit instruction parcel. Once we had +decided to fix on a little-endian memory system and instruction parcel +ordering, this naturally led to placing the length-encoding bits in +the LSB positions of the instruction format to avoid breaking up +opcode fields. +\end{commentary} + +\section{Exceptions, Traps, and Interrupts} + +We use the term {\em exception} to refer to an unusual condition +occurring at run time associated with an instruction in the current +RISC-V thread. We use the term {\em trap} to refer to the synchronous +transfer of control to a trap handler caused by an exceptional +condition occurring within a RISC-V thread. Trap handlers usually +execute in a more privileged environment. + +We use the term {\em interrupt} to refer to an external event that +occurs asynchronously to the current RISC-V thread. When an interrupt +that must be serviced occurs, some instruction is selected to receive +an interrupt exception and subsequently experiences a trap. + +The instruction descriptions in following chapters describe conditions +that raise an exception during execution. Whether and how these are +converted into traps is dependent on the execution environment, though +the expectation is that most environments will take a {\em precise} +trap when an exception is signaled (except for floating-point +exceptions, which, in the standard floating-point extensions, do not +cause traps). + +\begin{commentary} +Our use of ``exception'' and ``trap'' matches that in the IEEE-754 +floating-point standard. +\end{commentary} + diff --git a/src/l.tex b/src/l.tex new file mode 100644 index 0000000..dcc6c75 --- /dev/null +++ b/src/l.tex @@ -0,0 +1,17 @@ +\chapter{``L'' Standard Extension for Decimal Floating-Point, Version 0.0} + +This chapter is a placeholder for the specification of a standard +extension named ``L'' designed to support decimal floating-point +arithmetic as defined in the IEEE 754-2008 standard. + +\section{Decimal Floating-Point Registers} + +Existing floating-point registers are used to hold 64-bit and 128-bit +decimal floating-point values, and the existing floating-point load +and store instructions are used to move values to and from memory. + +\begin{commentary} +Due to the large opcode space required by the fused multiply-add +instructions, the decimal floating-point instruction extension will +require five 25-bit major opcodes in a 30-bit encoding space. +\end{commentary} diff --git a/src/m.tex b/src/m.tex new file mode 100644 index 0000000..cc289e8 --- /dev/null +++ b/src/m.tex @@ -0,0 +1,139 @@ +\chapter{``M'' Standard Extension for Integer Multiplication and + Division, Version 2.0} + +This chapter describes the standard integer multiplication and +division instruction extension, which is named ``M'' and contains +instructions that multiply or divide values held in two integer +registers. + +\begin{commentary} +We separate integer multiply and divide out from the base to simplify +low-end implementations, or for applications where integer multiply +and divide operations are either infrequent or better handled in +attached accelerators. +\end{commentary} + +\section{Multiplication Operations} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}R@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct7} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +MULDIV & multiplier & multiplicand & MUL/MULH[[S]U] & dest & OP \\ +MULDIV & multiplier & multiplicand & MULW & dest & OP-32 \\ +\end{tabular} +\end{center} + +MUL performs an XLEN-bit$\times$XLEN-bit multiplication and places the +lower XLEN bits in the destination register. MULH, MULHU, and MULHSU +perform the same multiplication but return the upper XLEN bits of the +full 2$\times$XLEN-bit product, for signed$\times$signed, +unsigned$\times$unsigned, and signed$\times$unsigned multiplication +respectively. If both the high and low bits of the same product are +required, then the recommended code sequence is: MULH[[S]U] {\em rdh, + rs1, rs2}; MUL {\em rdl, rs1, rs2} (source register specifiers must +be in same order and {\em rdh} cannot be the same as {\em rs1} or {\em + rs2}). Microarchitectures can then fuse these into a single +multiply operation instead of performing two separate multiplies. + +MULW is only valid for RV64, and multiplies the lower +32 bits of the source registers, placing the sign-extension of the +lower 32 bits of the result into the destination register. MUL can be +used to obtain the upper 32 bits of the 64-bit product, but signed +arguments must be proper 32-bit signed values, whereas unsigned +arguments must have their upper 32 bits clear. + +\section{Division Operations} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}R@{}R@{}O@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct7} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +MULDIV & divisor & dividend & DIV[U]/REM[U] & dest & OP \\ +MULDIV & divisor & dividend & DIV[U]W/REM[U]W & dest & OP-32 \\ +\end{tabular} +\end{center} + +DIV and DIVU perform signed and unsigned integer division of XLEN +bits by XLEN bits. REM and REMU provide the remainder of the +corresponding division operation. If both the quotient and remainder +are required from the same division, the recommended code sequence is: +DIV[U] {\em rdq, rs1, rs2}; REM[U] {\em rdr, rs1, rs2} ({\em rdq} +cannot be the same as {\em rs1} or {\em rs2}). Microarchitectures can +then fuse these into a single divide operation instead of performing +two separate divides. + +DIVW and DIVUW instructions are only valid for RV64, and divide the +lower 32 bits of {\em rs1} by the lower 32 bits of {\em rs2}, treating +them as signed and unsigned integers respectively, placing the 32-bit +quotient in {\em rd}, sign-extended to 64 bits. REMW and REMUW +instructions are only valid for RV64, and provide the corresponding +signed and unsigned remainder operations respectively. Both REMW and +REMUW sign-extend the 32-bit result to 64 bits. + +The semantics for division by zero and division overflow are summarized in +Table~\ref{tab:divby0}. The quotient of division by zero has all bits set, +i.e. $2^{XLEN}-1$ for unsigned division or $-1$ for signed division. The +remainder of division by zero equals the dividend. Signed division overflow +occurs only when the most-negative integer, $-2^{XLEN-1}$, is divided by $-1$. +The quotient of signed division overflow is equal to the dividend, and the +remainder is zero. Unsigned division overflow cannot occur. + +\vspace{0.1in} +\begin{table}[h] +\center +\begin{tabular}{|l|c|c||c|c|c|c|} +\hline +Condition & Dividend & Divisor & DIVU & REMU & DIV & REM \\ \hline +Division by zero & $x$ & 0 & $2^{XLEN}-1$ & $x$ & $-1$ & $x$ \\ +Overflow (signed only) & $-2^{XLEN-1}$ & $-1$ & -- & -- & $-2^{XLEN-1}$ & 0 \\ +\hline +\end{tabular} +\caption{Semantics for division by zero and division overflow.} +\label{tab:divby0} +\end{table} + +\begin{commentary} +We considered raising exceptions on integer divide by zero, with these +exceptions causing a trap in most execution environments. However, +this would be the only arithmetic trap in the standard ISA +(floating-point exceptions set flags and write default values, but do +not cause traps) and would require language implementors to interact +with the execution environment's trap handlers for this case. +Further, where language standards mandate that a divide-by-zero +exception must cause an immediate control flow change, only a single +branch instruction needs to be added to each divide operation, and +this branch instruction can be inserted after the divide and should +normally be very predictably not taken, adding little runtime +overhead. +\end{commentary} diff --git a/src/machine.tex b/src/machine.tex new file mode 100644 index 0000000..1e94a57 --- /dev/null +++ b/src/machine.tex @@ -0,0 +1,2312 @@ +\chapter{Machine-Level ISA} +\label{machine} + +This chapter describes the machine-level operations available in +machine-mode (M-mode), which is the highest privilege mode in a RISC-V +system. M-mode is the only mandatory privilege mode in a RISC-V +hardware implementation. M-mode is used for low-level access to a +hardware platform and is the first mode entered at reset. M-mode can +also be used to implement features that are too difficult or expensive +to implement in hardware directly. The RISC-V machine-level ISA +contains a common core that is extended depending on which other +privilege levels are supported and other details of the hardware +implementation. + +\section{Machine-Level CSRs} + +In addition to the machine-level CSRs described in this section, +M-mode code can access all CSRs at lower privilege levels. + +\subsection{Machine ISA Register {\tt misa}} + +The {\tt misa} register is an XLEN-bit \warl\ read-write register +reporting the ISA supported by the hart. This register must be +readable in any implementation, but a value of zero can be returned to +indicate the {\tt misa} register has not been implemented, requiring +that CPU capabilities be determined through a separate non-standard +mechanism. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{c@{}c@{}J} +\instbitrange{XLEN-1}{XLEN-2} & +\instbitrange{XLEN-3}{26} & +\instbitrange{25}{0} \\ +\hline +\multicolumn{1}{|c|}{Base (\warl)} & +\multicolumn{1}{c|}{\wiri} & +\multicolumn{1}{c|}{Extensions (\warl)} \\ +\hline +2 & XLEN-28 & 26 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine ISA register ({\tt misa}).} +\label{misareg} +\end{figure*} + +The Base field encodes the native base integer ISA width as shown in +Table~\ref{misabase}. The Base field may be writable in +implementations that support multiple base ISA widths. The Base field +is always set to the widest supported ISA variant at reset. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|r|l|} +\hline +Value & Description \\ +\hline +1 & 32 \\ +2 & 64 \\ +3 & 128 \\ +\hline +\end{tabular} +\end{center} +\caption{Encoding of Base field in {\tt misa}} +\label{misabase} +\end{table*} + +\begin{commentary} +The base can be quickly ascertained using branches on the sign of the +returned {\tt misa} value, and possibly a shift left by one and a +second branch on the sign. These checks can be written in assembly +code without knowing the register width (XLEN) of the machine. +The base width is given by $XLEN=2^{Base+4}$. +\end{commentary} + +The Extensions field encodes the presence of the standard extensions, +with a single bit per letter of the alphabet (bit 0 encodes presence +of extension ``A'' , bit 1 encodes presence of extension ``B'', +through to bit 25 which encodes ``Z''). The ``I'' bit will be set for +RV32I, RV64I, RV128I base ISAs, and the ``E'' bit will be set for +RV32E. The Extension is a \warl\ field that can contain writable bits +where the implementation allows the supported ISA to be modified. At +reset, the Extension field should contain the maximal set of supported +extensions, and I should be selected over E if both are available. + +The ``G'' bit is used as an escape to allow expansion to a larger +space of standard extension names. +\begin{commentary} +G is used to indicate the combination IMAFD, so is redundant in the +{\tt misa} register, hence we reserve the bit to indicate that +additional standard extensions are present. +\end{commentary} + +The ``U'',``S'', and ``H'' bits will be set if there is support for +user, supervisor, and hypervisor privilege modes respectively. + +The ``X'' bit will be set if there are any non-standard extensions. + +\begin{table*} +\begin{center} +\begin{tabular}{|r|r|l|} +\hline +Bit & Character & Description \\ +\hline + 0 & A & Atomic extension \\ + 1 & B & {\em Tentatively reserved for Bit operations extension} \\ + 2 & C & Compressed extension \\ + 3 & D & Double-precision floating-point extension \\ + 4 & E & RV32E base ISA \\ + 5 & F & Single-precision floating-point extension \\ + 6 & G & Additional standard extensions present \\ + 7 & H & Hypervisor mode implemented \\ + 8 & I & RV32I/64I/128I base ISA \\ + 9 & J & {\em Reserved} \\ + 10 & K & {\em Reserved} \\ + 11 & L & {\em Tentatively reserved for Decimal Floating-Point extension} \\ + 12 & M & Integer Multiply/Divide extension \\ + 13 & N & User-level interrupts supported \\ + 14 & O & {\em Reserved} \\ + 15 & P & {\em Tentatively reserved for Packed-SIMD extension} \\ + 16 & Q & Quad-precision floating-point extension \\ + 17 & R & {\em Reserved} \\ + 18 & S & Supervisor mode implemented \\ + 19 & T & {\em Tentatively reserved for Transactional Memory extension} \\ + 20 & U & User mode implemented \\ + 21 & V & {\em Tentatively reserved for Vector extension} \\ + 22 & W & {\em Reserved} \\ + 23 & X & Non-standard extensions present \\ + 24 & Y & {\em Reserved} \\ + 25 & Z & {\em Reserved} \\ +\hline +\end{tabular} +\end{center} +\caption{Encoding of Base field in {\tt misa}. All bits that are + reserved for future use must return zero when read.} +\label{misaletters} +\end{table*} + + +\begin{commentary} +The {\tt misa} register exposes a rudimentary catalog of CPU features +to machine-mode code. More extensive information can be obtained in +machine mode by probing other machine registers, and examining other +ROM storage in the system as part of the boot process. + +We require that lower privilege levels execute environment calls +instead of reading CPU registers to determine features available at +each privilege level. This enables virtualization layers to alter the +ISA observed at any level, and supports a much richer command +interface without burdening hardware designs. +\end{commentary} + + +\clearpage + +\subsection{Machine Vendor ID Register {\tt mvendorid}} + +The {\tt mvendorid} CSR is an XLEN-bit read-only register encoding the +manufacturer of the part. This register must be readable in any +implementation, but a value of 0 can be returned to indicate the field +is not implemented or that this is a non-commercial implementation. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Vendor} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Vendor ID register ({\tt mvendorid}).} +\label{mvendorreg} +\end{figure*} + +\begin{commentary} +Non-zero vendor IDs will be allocated by the RISC-V Foundation to commercial +vendors of RISC-V chips. +\end{commentary} + +\subsection{Machine Architecture ID Register {\tt marchid}} + +The {\tt marchid} CSR is an XLEN-bit read-only register encoding the +base microarchitecture of the hart. This register must be readable in +any implementation, but a value of 0 can be returned to indicate the +field is not implemented. The combination of {\tt mvendorid} and {\tt + marchid} should uniquely identify the type of hart microarchitecture +that is implemented. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Architecture ID} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine Architecture ID register ({\tt marchid}).} +\label{marchreg} +\end{figure*} + +Open-source project architecture IDs are allocated globally by the +RISC-V Foundation, and have non-zero architecture IDs with a zero +most-significant-bit (MSB). Commercial architecture IDs are allocated +by each commercial vendor independently, but must have the MSB set and +cannot contain zero in the remaining XLEN-1 bits. + +\begin{commentary} +The intent is for the architecture ID to represent the +microarchitecture associated with the repo around which development +occurs rather than a particular organization. Commercial fabrications +of open-source designs should (and might be required by the license +to) retain the original architecture ID. This will aid in reducing +fragmentation and tool support costs, as well as provide attribution. +Open-source architecture IDs should be administered by the Foundation +and should only be allocated to released, functioning open-source +projects. Commercial architecture IDs can be managed independently by +any registered vendor but are required to have IDs disjoint from the +open-source architecture IDs (MSB set) to prevent collisions if a +vendor wishes to use both closed-source and open-source +microarchitectures. + +The convention adopted within the following Implementation field can +be used to segregate branches of the same architecture design, +including by organization. The {\tt misa} register also helps +distinguish different variants of a design, as does the configuration +string if present. +\end{commentary} + +\subsection{Machine Implementation ID Register {\tt mimpid}} + +The {\tt mimpid} CSR provides a unique encoding of the version of the +processor implementation. This register must be readable in any +implementation, but a value of 0 can be returned to indicate that the +field is not implemented. The Implementation value should reflect the +design of the RISC-V processor itself and not any surrounding system. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Implementation} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine Implementation ID register ({\tt mimpid}).} +\label{mimpidreg} +\end{figure*} + +\begin{commentary} +The format of this field is left to the provider of the architecture +source code, but will be often be printed by standard tools as a +hexadecimal string without any leading or trailing zeros, so the +Implementation value should be left-justified (i.e., filled in from +most-significant nibble down) with subfields aligned on nibble +boundaries to ease human readability. +\end{commentary} + +\subsection{Hart ID Register {\tt mhartid}} + +The {\tt mhartid} register is an XLEN-bit read-only register +containing the integer ID of the hardware thread running the code. +This register must be readable in any implementation. Hart IDs might +not necessarily be numbered contiguously in a multiprocessor system, +but at least one hart must have a hart ID of zero. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Hart ID}\\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Hart ID register ({\tt mhartid}).} +\label{mhartidreg} +\end{figure*} + + +\begin{commentary} +In certain cases, we must ensure exactly one hart runs some code +(e.g., at reset), and so require one hart to have a known hart ID of +zero. + +For efficiency, system implementers should aim to reduce the magnitude +of the largest hart ID used in a system. +\end{commentary} + +\subsection{Machine Status Register ({\tt mstatus})} + +The {\tt mstatus} register is an XLEN-bit read/write register +formatted as shown in Figure~\ref{mstatusreg}. The {\tt mstatus} +register keeps track of and controls the hart's current operating +state. Restricted views of the {\tt mstatus} register appear as the +{\tt hstatus} and {\tt sstatus} registers in the H and S +privilege-level ISAs respectively. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{cEccccccc} +\\ +\instbit{XLEN-1} & +\instbitrange{XLEN-2}{29} & +\instbitrange{28}{24} & +\instbitrange{23}{20} & +\instbit{19} & +\instbit{18} & +\instbit{17} & +\instbitrange{16}{15} & + \\ +\hline +\multicolumn{1}{|c|}{SD} & +\multicolumn{1}{c|}{\wpri} & +\multicolumn{1}{c|}{VM[4:0]\,(\warl)} & +\multicolumn{1}{c|}{\wpri} & +\multicolumn{1}{c|}{MXR} & +\multicolumn{1}{c|}{PUM} & +\multicolumn{1}{c|}{MPRV} & +\multicolumn{1}{c|}{XS[1:0]} & + \\ +\hline +1 & XLEN-30 & 5 & 4 & 1 & 1 & 1 & 2 & \\ +\end{tabular} +\begin{tabular}{ccccccccccccc} +\\ +& +\instbitrange{14}{13} & +\instbitrange{12}{11} & +\instbitrange{10}{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline + & +\multicolumn{1}{|c|}{FS[1:0]} & +\multicolumn{1}{c|}{MPP[1:0]} & +\multicolumn{1}{c|}{HPP[1:0]} & +\multicolumn{1}{c|}{SPP} & +\multicolumn{1}{c|}{MPIE} & +\multicolumn{1}{c|}{HPIE} & +\multicolumn{1}{c|}{SPIE} & +\multicolumn{1}{c|}{UPIE} & +\multicolumn{1}{c|}{MIE} & +\multicolumn{1}{c|}{HIE} & +\multicolumn{1}{c|}{SIE} & +\multicolumn{1}{c|}{UIE} \\ +\hline + & 2 & 2 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine-mode status register ({\tt mstatus}).} +\label{mstatusreg} +\end{figure*} + + +\subsection{Privilege and Global Interrupt-Enable Stack in {\tt mstatus} register} +\label{privstack} + +Interrupt-enable bits, MIE, HIE, SIE, and UIE, are provided for each privilege +mode. These bits are primarily used to guarantee atomicity with respect to +interrupt handlers at the current privilege level. When a hart is executing +in privilege mode {\em x}, interrupts are enabled when {\em x}\,IE=1. +Interrupts for lower privilege modes are always disabled, whereas interrupts +for higher privilege modes are always enabled. Higher-privilege-level code +can use separate per-interrupt enable bits to disable selected interrupts +before ceding control to a lower privilege level. + +\begin{commentary} +The {\em x}IE bits are located in the low-order bits of {\tt mstatus}, +allowing them to be atomically set or cleared with a single CSR instruction. +\end{commentary} + +To support nested traps, each privilege mode {\em x} has a two-level +stack of interrupt-enable bits and privilege modes. {\em x}\,PIE +holds the value of the interrupt-enable bit active prior to the trap, +and {\em x}\,PP holds the previous privilege mode. The {\em x}\,PP +fields can only hold privilege modes up to {\em x}, so MPP and HPP are +two bits wide, SPP is one bit wide, and UPP is implicitly zero. When +a trap is taken from privilege mode {\em y} into privilege mode {\em + x}, {\em x}\,PIE is set to the value of {\em y}\,IE; {\em x}\,IE is set to +0; and {\em x}\,PP is set to {\em y}. + +\begin{commentary} +For lower privilege modes, any trap (synchronous or asynchronous) is +usually taken at a higher privilege mode with interrupts disabled. +The higher-level trap handler will either service the trap and return +using the stacked information, or, if not returning immediately to the +interrupted context, will save the privilege stack before re-enabling +interrupts, so only one entry per stack is required. +\end{commentary} + +The MRET, HRET, SRET, or URET instructions are used to return from +traps in M-mode, H-mode, S-mode, or U-mode respectively. When +executing an {\em x}RET instruction, supposing {\em x}\,PP holds the +value {\em y}, {\em y}\,IE is set to {\em x}\,PIE; the privilege mode +is changed to {\em y}; {\em x}\,PIE is set to 1; and {\em x}\,PP is +set to U (or M if user-mode is not supported). + +\begin{commentary} +When the stack is popped, the lowest-supported privilege mode with +interrupts enabled is added to the bottom of stack to help catch +errors that cause invalid entries to be popped off the stack. +\end{commentary} + +{\em x}\,PP fields are \wlrl\ fields that need only be able to store +supported privilege modes. + +\begin{commentary} +If the machine provides only U and M modes, then only a single +hardware storage bit is required to represent either 00 or 11 in MPP. +If the machine provides only M mode, then MPP is hardwired to 11. +\end{commentary} + +User-level interrupts are an optional extension and have been +allocated the ISA extension letter N. +If user-level interrupts are omitted, the +UIE and UPIE bits are hardwired to zero. For all other supported +privilege modes {\em x}, the {\em x}\,IE, {\em x}\,PIE, and {\em + x}\,PP fields are required to be implemented. + +\begin{commentary} +User-level interrupts are primarily intended to support secure +embedded systems with only M-mode and U-mode present. +\end{commentary} + +\subsection{Virtualization Management Field in {\tt mstatus} Register} +\label{sec:vm} + +The virtualization management field VM[4:0] indicates the currently +active scheme for virtualization, including virtual memory translation +and protection. Table~\ref{sbidmm} shows the currently defined +virtualization schemes. Only the Mbare mode is mandatory for a RISC-V +hardware implementation. The Mbare, Mbb, and Mbbid schemes are +described in Sections~\ref{mbare}--\ref{bb}, while the page-based +virtual memory schemes are described in later chapters. + +Each setting of the VM field defines operation at all supported +privilege levels, and the behavior of some VM settings might differ +depending on the privilege levels supported in hardware. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|r|l|l|l|} +\hline +Value & Abbreviation & Modes Required & Description \\ +\hline +0 & Mbare & M & No translation or protection. \\ +1 & Mbb & M, U & Single base-and-bound. \\ +2 & Mbbid & M, U & Separate instruction and data base-and-bound. \\ +\hline +3--7 & \multicolumn{3}{c|}{\em Reserved} \\ +\hline +8 & Sv32 & M, S, U & Page-based 32-bit virtual addressing. \\ +9 & Sv39 & M, S, U & Page-based 39-bit virtual addressing. \\ +10 & Sv48 & M, S, U & Page-based 48-bit virtual addressing. \\ +11 & Sv57 & M, S, U & Reserved for page-based 57-bit virtual addressing. \\ +12 & Sv64 & M, S, U & Reserved for page-based 64-bit virtual addressing. \\ +\hline +13--31 & \multicolumn{3}{c|}{\em Reserved} \\ +\hline +\end{tabular} +\end{center} +\caption{Encoding of virtualization management field VM[4:0].} +\label{sbidmm} +\end{table*} + +Mbare corresponds to no memory management or translation, and so all +effective addresses regardless of privilege mode are treated as +machine physical addresses. Mbare is the mode entered at reset. + +Mbb is a base-and-bounds architectures for systems with at least two +privilege levels (U and M). Mbb is suited for systems that require +low-overhead translation and protection for user-mode code, and that +do not require demand-paged virtual memory (swapping is supported). A +variant Mbbid provides separate address and data segments to allow an +execute-only code segment to be shared between processes. + +Sv32 is a page-based virtual-memory architecture for RV32 systems +providing a 32-bit virtual address space designed to support modern +supervisor-level operating systems, including Unix-based systems. + +Sv39 and Sv48 are page-based virtual-memory architectures for RV64 +systems providing a 39-bit or 48-bit virtual address space +respectively to support modern supervisor-level operating systems, +including Unix-based systems. + +Sv32, Sv39, and Sv48 require implementations to support M, S, and U +privilege levels. If H-mode is also present, additional operations +are defined for hypervisor-level code to support multiple +supervisor-level virtual machines. Hypervisor-mode support for +virtual machines has not yet been defined. + +\begin{commentary} +The existing Sv39 and Sv48 schemes can be readily extended to Sv57 and +Sv64 virtual address widths. Sv52, Sv60, Sv68, and Sv76 virtual +address space widths are tentatively planned for RV128 systems, +where virtual address widths under 68 bits are intended for +applications requiring 128-bit integer arithmetic but not larger +address spaces. + +Wider virtual address widths incur microarchitectural costs for wider +internal registers as well as longer page-table searches on +address-translation cache misses, so we support a range of virtual +address widths where each wider width adds one more level to the +in-memory page table. A single hardware page-table walker design can +easily support multiple virtual address widths, but requires internal +hardware registers to support the widest width. +\end{commentary} + +\begin{commentary} +Our current definition of the virtualization management schemes only +supports the same base architecture at every privilege level. +Variants of the virtualization schemes can be defined to support +narrow widths at lower-privilege levels, e.g., to run RV32 code on an +RV64 system. +\end{commentary} + +VM is a \warl\ field, so whether a VM setting is supported by an +implementation can be determined by writing the value to VM, then +reading the value back from VM to see if the same value was returned. + +\subsection{Memory Privilege in {\tt mstatus} Register} + +The MPRV bit modifies the privilege level at which loads and stores +execute. When MPRV=0, translation and protection behave as normal. When +MPRV=1, data memory addresses are translated and protected as though the +current privilege mode were set to MPP. Instruction address-translation and +protection are unaffected. + +The MXR (Make eXecutable Readable) bit modifies the privilege with +which loads access virtual memory. When MXR=0, only loads from pages +marked readable (R=1 in Figure~\ref{sv32pte}) will succeed. When +MXR=1, loads from pages marked either readable or executable (R=1 or +X=1) will succeed. + +\begin{commentary} +The MPRV and MXR mechanisms were conceived to improve the efficiency of M-mode +routines that emulate missing hardware features, e.g., misaligned loads and +stores. MPRV obviates the need to perform address translation in software. +MXR allows instruction words to be loaded from pages marked execute-only. + +For simplicity, MPRV and MXR are in effect regardless of privilege +mode, but in normal use will only be enabled for short sequences in +machine mode. +\end{commentary} + +The PUM (Protect User Memory) bit modifies the privilege with which S-mode +loads, stores, and instruction fetches access virtual memory. When PUM=0, +translation and protection behave as normal. When PUM=1, S-mode memory +accesses to pages that are accessible by U-mode (U=1 in Figure~\ref{sv32pte}) +will fault. PUM has no effect when page-based virtual memory is not in +effect. Note that, while PUM is ordinarily ignored when not executing in +S-mode, it {\em is} in effect when MPRV=1 and MPP=S. + +\subsection{Extension Context Status in {\tt mstatus} Register} + +Supporting substantial extensions is one of the primary goals of +RISC-V, and hence we define a standard interface to allow unchanged +privileged-mode code, particularly a supervisor-level OS, to support +arbitrary user-mode state extensions. + +\begin{commentary} + To date, there are no standard extensions that define additional + state beyond the floating-point CSR and data registers. +\end{commentary} + +The FS[1:0] read/write field and the XS[1:0] read-only field are used +to reduce the cost of context save and restore by setting and tracking +the current state of the floating-point unit and any other user-mode +extensions respectively. The FS field encodes the status of the +floating-point unit, including the CSR {\tt fcsr} and floating-point +data registers {\tt f0}--{\tt f31}, while the XS field encodes the +status of any additional user-mode extensions and associated state. +These fields can be checked by a context switch routine to quickly +determine whether a state save or restore is required. If a save or +restore is required, additional instructions and CSRs are typically +required to effect and optimize the process. + +\begin{commentary} + The design anticipates that most context switches will not need to + save/restore state in either or both of the floating-point unit or + other extensions, so provides a fast check via the SD bit. +\end{commentary} + +The FS and XS fields use the same status encoding as shown in +Table~\ref{fsxsencoding}, with the four possible status values being +Off, Initial, Clean, and Dirty. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|r|l|l|} +\hline +Status & FS Meaning & XS Meaning\\ +\hline +0 & Off & All off \\ +1 & Initial & None dirty or clean, some on\\ +2 & Clean & None dirty, some clean \\ +3 & Dirty & Some dirty \\ +\hline +\end{tabular} +\end{center} +\caption{Encoding of FS[1:0] and XS[1:0] status fields.} +\label{fsxsencoding} +\end{table*} + +In systems that do not implement S-mode and do not have a +floating-point unit, the FS field is hardwired to zero. + +In systems without additional user extensions requiring new state, the +XS field is hardwired to zero. Every additional extension with state +has a local status register encoding the equivalent of the XS states. +If there is only a single additional extension, its status can be +directly mirrored in the XS field. If there is more than one +additional extension, the XS field represents a summary of all +extensions' status as shown in Table~\ref{fsxsencoding}. + +\begin{commentary} +The XS field effectively reports the maximum status value across all +user-extension status fields, though individual extensions can use a +different encoding than XS. +\end{commentary} + +The SD bit is a read-only bit that summarizes whether either the FS +field or XS field signals the presence of some dirty state that will +require saving extended user context to memory. If both XS and FS are +hardwired to zero, then SD is also always zero. + +When an extension's status is set to Off, any instruction that +attempts to read or write the corresponding state will cause an +exception. When the status is Initial, the corresponding state should +have an initial constant value. When the status is Clean, the +corresponding state is potentially different from the initial value, +but matches the last value stored on a context swap. When the status +is Dirty, the corresponding state has potentially been modified since +the last context save. + +During a context save, the responsible privileged code need only write +out the corresponding state if its status is Dirty, and can then reset +the extension's status to Clean. During a context restore, the +context need only be loaded from memory if the status is Clean (it +should never be Dirty at restore). If the status is Initial, the +context must be set to an initial constant value on context restore to +avoid a security hole, but this can be done without accessing memory. +For example, the floating-point registers can all be initialized to +the immediate value 0. + +The FS and XS fields are read by the privileged code before saving the +context. The FS field is set directly by privileged code when +resuming a user context, while the XS field is set indirectly by +writing to the status register of the individual extensions. The +status fields will also be updated during execution of instructions, +regardless of privilege mode. + +Extensions to the user-mode ISA often include additional user-mode +state, and this state can be considerably larger than the base integer +registers. The extensions might only be used for some applications, +or might only be needed for short phases within a single application. +To improve performance, the user-mode extension can define additional +instructions to allow user-mode software to return the unit to an +initial state or even to turn off the unit. + +For example, a coprocessor might require to be configured before use +and can be ``unconfigured'' after use. The unconfigured state would +be represented as the Initial state for context save. If the same +application remains running between the unconfigure and the next +configure (which would set status to Dirty), there is no need to +actually reinitialize the state at the unconfigure instruction, as all +state is local to the user process, i.e., the Initial state may only +cause the coprocessor state to be initialized to a constant value at +context restore, not at every unconfigure. + +Executing a user-mode instruction to disable a unit and place it into +the Off state will cause an illegal instruction exception to be raised +if any subsequent instruction tries to use the unit before it is +turned back on. A user-mode instruction to turn a unit on must also +ensure the unit's state is properly initialized, as the unit might +have been used by another context meantime. + +Table~\ref{fsxsstates} shows all the possible state transitions for +the FS or XS status bits. Note that the standard floating-point +extensions do not support user-mode unconfigure or disable/enable +instructions. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|l|l|l|l|l|} +\hline +\multicolumn{1}{|r|}{Current State} & Off & Initial & Clean & Dirty \\ +Action & & & &\\ +\hline +\hline +\multicolumn{5}{|c|}{At context save in privileged code}\\ +\hline +Save state? & No & No & No & Yes \\ +Next state & Off & Initial & Clean & Clean \\ +\hline +\hline +\multicolumn{5}{|c|}{At context restore in privileged code}\\ +\hline +Restore state? & No & Yes, to initial & Yes, from memory & N/A \\ +Next state & Off & Initial & Clean & N/A \\ +\hline +\hline +\multicolumn{5}{|c|}{Execute instruction to read state}\\ +\hline +Action? & Exception & Execute & Execute & Execute \\ +Next state & Off & Initial & Clean & Dirty \\ +\hline +\hline +\multicolumn{5}{|c|}{Execute instruction to modify state, including configuration}\\ +\hline +Action? & Exception & Execute & Execute & Execute \\ +Next state & Off & Dirty & Dirty & Dirty \\ +\hline +\hline +\multicolumn{5}{|c|}{Execute instruction to unconfigure unit}\\ +\hline +Action? & Exception & Execute & Execute & Execute \\ +Next state & Off & Initial & Initial & Initial \\ +\hline +\hline +\multicolumn{5}{|c|}{Execute instruction to disable unit}\\ +\hline +Action? & Execute & Execute & Execute & Execute \\ +Next state & Off & Off & Off & Off \\ +\hline +\hline +\multicolumn{5}{|c|}{Execute instruction to enable unit}\\ +\hline +Action? & Execute & Execute & Execute & Execute \\ +Next state & Initial & Initial & Initial & Initial \\ +\hline +\end{tabular} +\end{center} +\caption{FS and XS state transitions.} +\label{fsxsstates} +\end{table*} + +Standard privileged instructions to initialize, save, and restore +extension state are provided to insulate privileged code from details +of the added extension state by treating the state as an opaque +object. + +\begin{commentary} +Many coprocessor extensions are only used in limited contexts that +allows software to safely unconfigure or even disable units when done. +This reduces the context-switch overhead of large stateful +coprocessors. + +We separate out floating-point state from other extension state, as +when a floating-point unit is present the floating-point registers are +part of the standard calling convention, and so user-mode software +cannot know when it is safe to disable the floating-point unit. +\end{commentary} + +The XS field provides a summary of all added extension state, but +additional microarchitectural bits might be maintained in the +extension to further reduce context save and restore overhead. + +The SD bit is read-only and is set when either the FS or XS bits +encode a Dirty state (i.e., SD=((FS==11) OR (XS==11))). This allows +privileged code to quickly determine when no additional context save is +required beyond the integer register set and PC. + +The floating-point unit state is always initialized, saved, and +restored using standard instructions (F, D, and/or Q), and privileged +code must be aware of FLEN to determine the appropriate space to +reserve for each {\tt f} register. + +In a supervisor-level OS, any additional user-mode state should be +initialized, saved, and restored using SBI calls that treats the +additional context as an opaque object of a fixed maximum size. The +implementation of the SBI initialize, save, and restore calls might +require additional implementation-dependent privileged instructions to +initialize, save, and restore microarchitectural state inside a +coprocessor. + +All privileged modes share a single copy of the FS and XS bits. In a +system with more than one privileged mode, supervisor mode would +normally use the FS and XS bits directly to record the status with +respect to the supervisor-level saved context. Other more-privileged +active modes must be more conservative in saving and restoring the +extension state in their corresponding version of the context, but can +rely on the Off state to avoid save and restore, and the Initial state +to avoid saving the state. + +\begin{commentary} +In any reasonable use case, the number of context switches between +user and supervisor level should far outweigh the number of context +switches to other privilege levels. Note that coprocessors should not +require their context to be saved and restored to service asynchronous +interrupts, unless the interrupt results in a user-level context swap. +\end{commentary} + +\subsection{Machine Trap-Vector Base-Address Register ({\tt mtvec})} + +The {\tt mtvec} register is an XLEN-bit read/write register that holds +the base address of the M-mode trap vector. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J@{}F} +\instbitrange{XLEN-1}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{Trap-Vector Base Address (\warl)} & +\multicolumn{1}{c|}{0} \\ +\hline +XLEN-2 & 2 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine trap-vector base-address register ({\tt mtvec}).} +\label{mtvecreg} +\end{figure*} + +The {\tt mtvec} register must always be implemented, but can contain +a hardwired read-only value. If {\tt mtvec} is writable, the set of values +the register may hold can vary by implementation. The value in the {\tt +mtvec} register must always be aligned on a 4-byte boundary (the low two bits +are always zero). The value returned by reading a variable {\tt mtvec} +register should always match the value used to generate the handler PC address +when handling traps. + +\begin{commentary} +We allow for considerable flexibility in implementation of the trap +vector base address. On the one hand, we do not wish to burden low-end +implementations with a large number of state bits, but on the other +hand, we wish to allow flexibility for larger systems. +\end{commentary} + +By default, all traps into machine mode cause the {\tt pc} to be set to the +value in {\tt mtvec}. Additional trap vector entry points can be defined by +implementations to allow more rapid identification and service of certain trap +causes. + +The location of the reset vector and non-maskable interrupt vector are +implementation-defined. + +\begin{commentary} +Reset, NMI vectors, and other interrupt vector default locations are +given in a platform specification. +\end{commentary} + +\subsection{Machine Trap Delegation Registers ({\tt medeleg} and {\tt mideleg})} + +By default, all traps at any privilege level are handled in machine +mode, though a machine-mode handler can redirect traps back to the +appropriate level with the MRET instruction (Section~\ref{otherpriv}). +To increase performance, implementations can provide individual +read/write bits within {\tt medeleg} and {\tt mideleg} to indicate +that certain exceptions and interrupts should be processed directly by +a lower privilege level. The machine exception delegation register +({\tt medeleg}) and machine interrupt delegation register ({\tt + mideleg}) are XLEN-bit read/write registers. + +In systems with all four privilege modes (M/H/S/U), a set bit in {\tt + medeleg} or {\tt mideleg} will delegate any corresponding trap in +U-mode, S-mode, or H-mode to the H-mode trap handler. H-mode may in +turn set corresponding bits in the {\tt hedeleg} and {\tt hideleg} +registers to delegate traps that occur in S-mode or U-mode to the +S-mode trap handler. If U-mode traps are supported, S-mode may in +turn set corresponding bits in the {\tt sedeleg} and {\tt sideleg} +registers to delegate traps that occur in U-mode to the U-mode trap +handler. + +In systems with three privilege modes (M/S/U), setting a bit in {\tt + medeleg} or {\tt mideleg} will delegate the corresponding trap in +S-mode or U-mode to the S-mode trap handler. If U-mode traps are +supported, S-mode may in turn set corresponding bits in the {\tt + sedeleg} and {\tt sideleg} registers to delegate traps that occur in +U-mode to the U-mode trap handler. + +In systems with two privilege modes (M/U) and support for U-mode +traps, setting a bit in {\tt medeleg} or {\tt mideleg} will +delegate the corresponding trap in U-mode to the U-mode trap handler. + +If systems with only M-mode, or with both M-mode and U-mode but +without U-mode trap support, the {\tt medeleg} and {\tt mideleg} +registers should be hardwired to zero. + +When a trap is delegated to a less-privileged mode {\em x}, the +{\em x}\,{\tt cause} register is written with the trap cause; the +{\em x}\,{\tt epc} register is written with the virtual address of +the instruction that took the trap; the {\em x}\,PP field +of {\tt mstatus} is written with the active privilege mode at the time of +the trap; the {\em x}\,PIE field of {\tt mstatus} is written with the +value of the active interrupt-enable bit at the time of the trap; and +the {\em x}\,IE field of {\tt mstatus} is cleared. The {\tt mcause} and +{\tt mepc} registers and the MPP and MPIE fields of {\tt mstatus} are +not written. + +An implementation shall not hardwire any delegation bits to one, i.e., +any trap that can be delegated must support not being delegated. An +implementation can choose to subset the delegatable traps, with the +supported delegatable bits found by writing one to every bit location, +then reading back the value in {\tt medeleg} or {\tt mideleg} to see +which bit positions hold a one. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}U} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Synchronous Exceptions (\warl)} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine Exception Delegation Register {\tt medeleg}.} +\label{medelegreg} +\end{figure} + +{\tt medeleg} has a bit position allocated for every synchronous exception +shown in Table~\ref{mcauses}, with the index of the bit position equal to the +value returned in the {\tt mcause} register (i.e., setting bit 8 allows +user-mode environment calls to be delegated to a lower-privilege trap +handler). + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}U} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{Interrupts (\warl)} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine Exception Delegation Register {\tt mideleg}.} +\label{midelegreg} +\end{figure} + +{\tt mideleg} holds trap delegation bits for individual interrupts, with the +layout of bits matching those in the {\tt mip} register (i.e., STIP interrupt +delegation control is located in bit 5). + +\subsection{Machine Interrupt Registers ({\tt mip} and {\tt mie})} + +The {\tt mip} register is an XLEN-bit read/write register containing +information on pending interrupts, while {\tt mie} is the +corresponding XLEN-bit read/write register containing interrupt enable +bits. Only the bits corresponding to lower-privilege software +interrupts (USIP, SSIP, HSIP) and timer interrupts (UTIP, STIP and +HTIP) in {\tt mip} are writable through this CSR address; the +remaining bits are read-only. + +Restricted views of the {\tt mip} and {\tt mie} registers appear as the {\tt +hip}/{\tt hie}, {\tt sip}/{\tt sie}, and {\tt uip}/{\tt uie} registers in +H-mode, S-mode, and U-mode respectively. If an interrupt is delegated to +privilege mode {\em x} by setting a bit in the {\tt mideleg} register, it +becomes visible in the {\em x}\,{\tt ip} register and is maskable using the {\em +x}\,{\tt ie} register. Otherwise, the corresponding bits in {\em x}\,{\tt ip} +and {\em x}\,{\tt ie} appear to be hardwired to zero. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{Scccccccccccc} +\instbitrange{XLEN-1}{12} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{\wiri} & +\multicolumn{1}{c|}{MEIP} & +\multicolumn{1}{c|}{HEIP} & +\multicolumn{1}{c|}{SEIP} & +\multicolumn{1}{c|}{UEIP} & +\multicolumn{1}{c|}{MTIP} & +\multicolumn{1}{c|}{HTIP} & +\multicolumn{1}{c|}{STIP} & +\multicolumn{1}{c|}{UTIP} & +\multicolumn{1}{c|}{MSIP} & +\multicolumn{1}{c|}{HSIP} & +\multicolumn{1}{c|}{SSIP} & +\multicolumn{1}{c|}{USIP} \\ +\hline +XLEN-12 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine interrupt-pending register ({\tt mip}).} +\label{mipreg} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{Scccccccccccc} +\instbitrange{XLEN-1}{12} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{\wpri} & +\multicolumn{1}{c|}{MEIE} & +\multicolumn{1}{c|}{HEIE} & +\multicolumn{1}{c|}{SEIE} & +\multicolumn{1}{c|}{UEIE} & +\multicolumn{1}{c|}{MTIE} & +\multicolumn{1}{c|}{HTIE} & +\multicolumn{1}{c|}{STIE} & +\multicolumn{1}{c|}{UTIE} & +\multicolumn{1}{c|}{MSIE} & +\multicolumn{1}{c|}{HSIE} & +\multicolumn{1}{c|}{SSIE} & +\multicolumn{1}{c|}{USIE} \\ +\hline +XLEN-12 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine interrupt-enable register ({\tt mie}).} +\label{miereg} +\end{figure*} + +The MTIP, HTIP, STIP, UTIP bits correspond to timer interrupt-pending +bits for machine, hypervisor, supervisor, and user timer interrupts, +respectively. The MTIP bit is read-only and is cleared by writing to +the memory-mapped machine-mode timer compare register. The UTIP, STIP +and HTIP bits may be written by M-mode software to deliver timer +interrupts to lower privilege levels. User, supervisor and hypervisor +software may clear the UTIP, STIP and HTIP bits with calls to the AEE, +SEE, or HEE, respectively. + +There is a separate timer interrupt-enable bit, named MTIE, HTIE, +STIE, and UTIE for M-mode, H-mode, S-mode, and U-mode timer interrupts +respectively. + +Each lower privilege level has a separate software interrupt-pending +bit (HSIP, SSIP, USIP), which can be both read and written by CSR +accesses from code running on the local hart at the associated or any +higher privilege level. The machine-level MSIP bits are written by +accesses to memory-mapped control registers, which are used by remote +harts to provide machine-mode interprocessor interrupts. +Interprocessor interrupts for lower privilege levels are implemented +through ABI, SBI or HBI calls to the AEE, SEE or HEE respectively, +which might ultimately result in a machine-mode write to the receiving +hart's MSIP bit. A hart can write its own MSIP bit using the same +memory-mapped control register. + +\begin{commentary} +We only allow a hart to directly write its own HSIP, SSIP, or USIP +bits when running in appropriate mode, as other harts might be +virtualized and possibly descheduled by higher privilege levels. We +rely on ABI, SBI, and HBI calls to provide interprocessor interrupts +for this reason. Machine-mode harts are not virtualized and can +directly interrupt other harts by setting their MSIP bits, typically +using uncached I/O writes to memory-mapped control registers depending +on the platform specification. +\end{commentary} + +The MEIP, HEIP, SEIP, UEIP bits correspond to external +interrupt-pending bits for machine, hypervisor, supervisor, and user +external interrupts, respectively. These bits are read-only and are +set and cleared by a platform-specific interrupt controller, such as +the standard platform-level interrupt controller specified in +Chapter~\ref{plic}. There is a separate external interrupt-enable +bit, named MEIE, HEIE, SEIE, and UEIE for M-mode, H-mode, S-mode, and +U-mode external interrupts respectively. + +\begin{commentary} +The non-maskable interrupt is not made visible via the {\tt mip} +register as its presence is implicitly known when executing the NMI +trap handler. +\end{commentary} + +For all the various interrupt types (software, timer, and external), +if a privilege level is not supported, the associated pending and +interrupt-enable bits are hardwired to zero in the {\tt mip} and {\tt + mie} registers respectively. Hence, these are all effectively +\warl\ fields. + +\begin{commentary} +Implementations can add additional platform-specific machine-level +interrupt sources to the high bits of these registers, though the +expectation is that most external interrupts will be routed through +the platform interrupt controller and be delivered via MEIP. +\end{commentary} + +An interrupt {\em i} will be taken if bit {\em i} is set in both {\tt + mip} and {\tt mie}, and if interrupts are globally enabled. By +default, M-mode interrupts are globally enabled if the hart's current +privilege mode is less than M, or if the current privilege mode is M +and the MIE bit in the {\tt mstatus} register is set. If bit {\em i} +in {\tt mideleg} is set, however, interrupts are considered to be +globally enabled if the hart's current privilege mode equals the +delegated privilege mode (H, S, or U) and that mode's interrupt enable +bit (HIE, SIE or UIE in {\tt mstatus}) is set, or if the current +privilege mode is less than the delegated privilege mode. + +Multiple simultaneous interrupts and traps at the same privilege level +are handled in the following decreasing priority order: external +interrupts, software interrupts, timer interrupts, then finally any +synchronous traps. + +\subsection{Machine Timer Registers ({\tt mtime} and {\tt mtimecmp})} + +Platforms provide a real-time counter, exposed as a memory-mapped +machine-mode register, {\tt mtime}. {\tt mtime} must run at constant +frequency, and the platform must provide a mechanism for determining +the timebase of {\tt mtime}. + +The {\tt mtime} register has a 64-bit precision on all RV32, RV64, and +RV128 systems. Platforms provide a 64-bit memory-mapped machine-mode +timer compare register ({\tt mtimecmp}), which causes a timer +interrupt to be posted when the {\tt mtime} register contains a value +greater than or equal to the value in the {\tt mtimecmp} register. +The interrupt remains posted until it is cleared by writing the {\tt + mtimecmp} register. The interrupt will only be taken if interrupts +are enabled and the MTIE bit is set in the {\tt mie} register. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{63}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mtime} \\ +\hline +64 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine time register (memory-mapped control register).} +\end{figure} + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{63}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mtimecmp} \\ +\hline +64 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine time compare register (memory-mapped control register).} +\end{figure} + +\begin{commentary} +The timer facility is defined to use wall-clock time rather than a +cycle counter to support modern processors that run with a highly +variable clock frequency to save energy through dynamic voltage and +frequency scaling. + +Accurate real-time clocks (RTCs) are relatively expensive to provide +(requiring a crystal or MEMS oscillator) and have to run even when the +rest of system is powered down, and so there is usually only one in a +system located in a different frequency/voltage domain from the +processors. Hence, the RTC must be shared by all the harts in a +system and accesses to the RTC will potentially incur the penalty of a +voltage-level-shifter and clock-domain crossing. It is thus more +natural to expose {\tt mtime} as a memory-mapped register than as a CSR. + +Lower privilege levels do not have their own {\tt timecmp} registers. +Instead, machine-mode software can implement any number of virtual timers on +a hart by multiplexing the next timer interrupt into the {\tt mtimecmp} +register. + +Simple fixed-frequency systems can use a single clock for both cycle +counting and wall-clock time. +\end{commentary} + +In RV32, memory-mapped writes to {\tt mtimecmp} modify only one 32-bit +part of the register. The following code sequence sets a 64-bit {\tt + mtimecmp} value without spuriously generating a timer interrupt due +to the intermediate value of the comparand: + +\begin{figure}[h!] +\begin{center} +\begin{verbatim} + # New comparand is in a1:a0. + li t0, -1 + sw t0, mtimecmp # No smaller than old value. + sw a1, mtimecmp+4 # No smaller than new value. + sw a0, mtimecmp # New value. +\end{verbatim} +\end{center} +\caption{Sample code for setting the 64-bit time comparand in RV32 + assuming the registers live in a strongly ordered I/O region.} +\label{mtimecmph} +\end{figure} + +\subsection{Hardware Performance Monitor} + +M-mode includes a basic hardware performance monitoring facility. The {\tt +mcycle} CSR holds a count of the number of cycles the hart has executed since +some arbitrary time in the past. The {\tt minstret} CSR holds a count of the +number of instructions the hart has retired since some arbitrary time in the +past. The {\tt mcycle} and {\tt minstret} registers have 64-bit precision on +all RV32, RV64, and RV128 systems. + +The hardware performance monitor includes 29 additional event counters, {\tt +mhpmcounter3}--{\tt mhpmcounter31}. The event selector CSRs, {\tt +mhpmevent3}--{\tt mhpmevent31}, are \warl\ registers that control which event +causes the corresponding counter to increment. The meaning of these events is +defined by the platform, but event 0 is reserved to mean ``no event.'' +All counters should be implemented, but a legal implementation is to hard-wire +both the counter and its corresponding event selector to 0. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}K@{}W@{}K} +\instbitrange{63}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\tt mcycle} \\ \cline{1-1} +\multicolumn{1}{|c|}{\tt minstret} \\ \cline{1-1} + & & \instbitrange{XLEN-1}{0} \\ \cline{1-1}\cline{3-3} +\multicolumn{1}{|c|}{\tt mhpmcounter3} & & \multicolumn{1}{|c|}{\tt mhpmevent3} \\ \cline{1-1}\cline{3-3} +\multicolumn{1}{|c|}{\tt mhpmcounter4} & & \multicolumn{1}{|c|}{\tt mhpmevent4} \\ \cline{1-1}\cline{3-3} +\multicolumn{1}{c}{\vdots} & & \multicolumn{1}{c}{\vdots} \\ \cline{1-1}\cline{3-3} +\multicolumn{1}{|c|}{\tt mhpmcounter30} & & \multicolumn{1}{|c|}{\tt mhpmevent30} \\ \cline{1-1}\cline{3-3} +\multicolumn{1}{|c|}{\tt mhpmcounter31} & & \multicolumn{1}{|c|}{\tt mhpmevent31} \\ \cline{1-1}\cline{3-3} +64 & & XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Hardware performance monitor counters.} +\end{figure} + +All of these counters have 64-bit precision on RV32, RV64, and RV128. + +On RV32 only, reads of the {\tt mcycle}, {\tt minstret}, and {\tt +mhpmcounter{\em n}} CSRs return the low 32 bits, while reads of the {\tt +mcycleh}, {\tt minstreth}, and {\tt mhpmcounter{\em n}h} CSRs return bits +63--32 of the corresponding counter. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}K} +\instbitrange{31}{0} \\ \hline +\multicolumn{1}{|c|}{\tt mcycleh} \\ \hline +\multicolumn{1}{|c|}{\tt minstreth} \\ \hline +\multicolumn{1}{|c|}{\tt mhpmcounter3h} \\ \hline +\multicolumn{1}{|c|}{\tt mhpmcounter4h} \\ \hline +\multicolumn{1}{c}{\vdots} \\ \hline +\multicolumn{1}{|c|}{\tt mhpmcounter30h} \\ \hline +\multicolumn{1}{|c|}{\tt mhpmcounter31h} \\ \hline +32 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Upper 32 bits of hardware performance monitor counters, RV32 only.} +\end{figure} + +On RV128 systems, the 64-bit values in {\tt mcycle}, {\tt minstret}, and +{\tt mhpmcounter{\em n}} are sign-extended to 128-bits when read. +\begin{commentary} +On RV128 systems, both signed and unsigned 64-bit values are held in a +canonical form with bit 63 repeated in all higher bit positions. The +counters are 64-bit values even in RV128, and so the counter CSR reads +preserve the sign-extension invariant. Implementations may choose to +implement fewer bits of the counters, provided software would be unlikely +to experience wraparound (e.g., $2^{63}$ instructions executed) +and thereby avoid having to actually implement the sign-extension +circuitry. +\end{commentary} + +\subsection{Machine Counter-Enable Registers ({\tt m[h|s|u]counteren})} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{cccMcccccc} +\instbit{31} & +\instbit{30} & +\instbit{29} & +\instbitrange{28}{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{HPM31} & +\multicolumn{1}{c|}{HPM30} & +\multicolumn{1}{c|}{HPM29} & +\multicolumn{1}{c|}{...} & +\multicolumn{1}{c|}{HPM5} & +\multicolumn{1}{c|}{HPM4} & +\multicolumn{1}{c|}{HPM3} & +\multicolumn{1}{c|}{IR} & +\multicolumn{1}{c|}{TM} & +\multicolumn{1}{c|}{CY} \\ +\hline +1 & 1 & 1 & 23 & 1 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine counter-enable registers ({\tt mhcounteren}, {\tt mscounteren}, {\tt mucounteren}).} +\label{mhcounteren} +\end{figure*} + +The machine counter-enable registers, {\tt mhcounteren}, {\tt mscounteren}, +and {\tt mucounteren}, control the availability of the hardware performance +monitoring counters to hypervisor, supervisor, and user modes, respectively. + +When the CY, TM, IR, or HPM{\em n} bit in the {\tt mhcounteren} register is +clear, attempts to read the {\tt cycle}, {\tt time}, {\tt instret}, or +{\tt hpmcounter{\em n}} register while executing in H-mode +will cause an illegal instruction exception. +When one of these bits is set, access to the corresponding register is +permitted in H-mode. +The same bit positions in the {\tt mscounteren} +register analogously control access to these registers while executing +in S-mode. The same bit positions in the {\tt mucounteren} +register analogously control access to these registers +while executing in U-mode. + +Each counter-enable register must be implemented if the corresponding +privilege mode is implemented. However, any of the bits may contain +a hardwired value of zero, indicating reads to the corresponding counter will +cause an exception when executing in the corresponding privilege mode. +Hence, they are effectively \warl\ fields. +\begin{commentary} +The counter-enable bits support two common use cases with minimal hardware. +For systems that do not need high-performance timers and counters, +machine-mode software can trap accesses and implement all features in +software. For systems that need high-performance timers and counters +but are not concerned with obfuscating the underlying hardware +counters, the counters can be directly exposed to lower privilege modes. +\end{commentary} + +The {\tt cycle}, {\tt instret}, and {\tt hpmcounter{\em n}} CSRs are +read-only shadows of {\tt mcycle}, {\tt minstret}, and {\tt mhpmcounter{\em +n}}, respectively. The {\tt time} CSR is a read-only shadow of the +memory-mapped {\tt mtime} register. +\begin{commentary} +Implementations can convert reads of the {\tt time} CSR into loads to +the memory-mapped {\tt mtime} register, or hard-wire the TM bits in +{\tt m{\em x}counteren} to 0 +and emulate this functionality in M-mode software. +\end{commentary} + +\subsection{Machine Scratch Register ({\tt mscratch})} + +The {\tt mscratch} register is an XLEN-bit read/write register +dedicated for use by machine mode. Typically, it is used to hold a +pointer to a machine-mode hart-local context space and swapped with a +user register upon entry to an M-mode trap handler. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mscratch} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine-mode scratch register.} +\label{mscratchreg} +\end{figure} + +\begin{commentary} +The MIPS ISA allocated two user registers ({\tt k0}/{\tt k1}) for use +by the operating system. Although the MIPS scheme provides a fast and +simple implementation, it also reduces available user registers, and +does not scale to further privilege levels, or nested traps. It can +also require both registers are cleared before returning to user level +to avoid a potential security hole and to provide deterministic +debugging behavior. + +The RISC-V user ISA was designed to support many possible privileged +system environments and so we did not want to infect the user-level +ISA with any OS-dependent features. The RISC-V CSR swap instructions +can quickly save/restore values to the {\tt mscratch} register. +Unlike the MIPS design, the OS can rely on holding a value in the {\tt + mscratch} register while the user context is running. +\end{commentary} + +\subsection{Machine Exception Program Counter ({\tt mepc})} + +{\tt mepc} is an XLEN-bit read/write register formatted as shown in +Figure~\ref{mepcreg}. The low bit of {\tt mepc} ({\tt mepc[0]}) is +always zero. On implementations that do not support instruction-set +extensions with 16-bit instruction alignment, the two low bits ({\tt + mepc[1:0]}) are always zero. + +\begin{commentary} +The {\tt mepc} register can never hold a PC value that would cause an +instruction-address-misaligned exception. +\end{commentary} + +When a trap is taken, {\tt mepc} is written with the virtual address +of the instruction that encountered the exception. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mepc} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine exception program counter register.} +\label{mepcreg} +\end{figure} + +\subsection{Machine Cause Register ({\tt mcause})} + +The {\tt mcause} register is an XLEN-bit read-write register formatted +as shown in Figure~\ref{mcausereg}. The Interrupt bit is set if the +trap was caused by an interrupt. The Exception Code field + contains a code identifying the last exception. Table~\ref{mcauses} +lists the possible machine-level exception codes. The Exception Code +is an \wlrl\ field, so is only guaranteed to hold supported exception +codes. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{c@{}U} +\instbit{XLEN-1} & +\instbitrange{XLEN-2}{0} \\ +\hline +\multicolumn{1}{|c|}{Interrupt} & +\multicolumn{1}{c|}{Exception Code (\wlrl)} \\ +\hline +1 & XLEN-1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine Cause register {\tt mcause}.} +\label{mcausereg} +\end{figure*} + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|r|r|l|l|} + + \hline + Interrupt & Exception Code & Description \\ + \hline + 1 & 0 & User software interrupt \\ + 1 & 1 & Supervisor software interrupt \\ + 1 & 2 & Hypervisor software interrupt \\ + 1 & 3 & Machine software interrupt \\ + 1 & 4 & User timer interrupt \\ + 1 & 5 & Supervisor timer interrupt \\ + 1 & 6 & Hypervisor timer interrupt \\ + 1 & 7 & Machine timer interrupt \\ + 1 & 8 & User external interrupt \\ + 1 & 9 & Supervisor external interrupt \\ + 1 & 10 & Hypervisor external interrupt \\ + 1 & 11 & Machine external interrupt \\ + 1 & $\ge$12 & {\em Reserved} \\ \hline + 0 & 0 & Instruction address misaligned \\ + 0 & 1 & Instruction access fault \\ + 0 & 2 & Illegal instruction \\ + 0 & 3 & Breakpoint \\ + 0 & 4 & Load address misaligned \\ + 0 & 5 & Load access fault \\ + 0 & 6 & Store/AMO address misaligned \\ + 0 & 7 & Store/AMO access fault \\ + 0 & 8 & Environment call from U-mode\\ + 0 & 9 & Environment call from S-mode \\ + 0 & 10 & Environment call from H-mode \\ + 0 & 11 & Environment call from M-mode \\ + 0 & $\ge$12 & {\em Reserved} \\ + \hline + +\end{tabular} +\end{center} +\caption{Machine cause register ({\tt mcause}) values after trap.} +\label{mcauses} +\end{table*} + +\begin{commentary} +We do not distinguish privileged instruction exceptions from illegal +opcode exceptions. This simplifies the architecture and also hides +details of which higher-privilege instructions are supported by an +implementation. The privilege level servicing the trap can implement +a policy on whether these need to be distinguished, and if so, whether +a given opcode should be treated as illegal or privileged. +\end{commentary} + +\begin{commentary} +Interrupts can be separated from other traps with a single branch on the sign of +the {\tt mcause} register value. A shift left can remove the +interrupt bit and scale the exception codes to index into a trap +vector table. +\end{commentary} + +\subsection{Machine Bad Address ({\tt mbadaddr}) Register} + +{\tt mbadaddr} is an XLEN-bit read-write register formatted as shown in +Figure~\ref{mbadaddrreg}. When a hardware breakpoint is triggered, or an +instruction-fetch, load, or store address-misaligned or access exception +occurs, {\tt mbadaddr} is written with the faulting address. {\tt mbadaddr} is +not modified for other exceptions. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mbadaddr} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Machine bad address register.} +\label{mbadaddrreg} +\end{figure} + +For instruction-fetch access faults on RISC-V systems with +variable-length instructions, {\tt mbadaddr} will point to the +portion of the instruction that caused the fault while {\tt mepc} will +point to the beginning of the instruction. + +\section{Machine-Mode Privileged Instructions} + +\subsection{Trap-Return Instructions} +\label{otherpriv} + +Instructions to return from trap are encoded under the PRIV +minor opcode. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct12} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +MRET/HRET/SRET/URET & 0 & PRIV & 0 & SYSTEM \\ +\end{tabular} +\end{center} + +To return after handling a trap, there are separate trap return +instructions per privilege level: MRET, HRET, SRET, and URET. MRET is +always provided, while HRET and SRET must be provided if the +respective privilege mode is supported. URET is only provided if +user-mode traps are supported. An {\em x}\,RET instruction can be +executed in privilege mode {\em x} or higher, where executing a +lower-privilege {\em x}\,RET instruction will pop the relevant +lower-privilege interrupt enable and privilege mode stack. In +addition to manipulating the privilege stack as described in +Section~\ref{privstack}, {\em x}\,RET sets the {\tt pc} to the value +stored in the {\em x}\,{\tt epc} register. + +\begin{commentary} +Previously, there was only a single ERET instruction (which was also +earlier known as SRET). To support the addition of user-level +interrupts, we needed to add a separate URET instruction to continue +to allow classic virtualization of OS code using the ERET instruction. +It then became more orthogonal to support a different {\em x}RET +instruction per privilege level (which also enables virtualization of +a hypervisor at supervisor level). +\end{commentary} + +\subsection{Wait for Interrupt} +\label{wfi} + +The Wait for Interrupt instruction (WFI) provides a hint to the +implementation that the current hart can be stalled until an interrupt +might need servicing. Execution of the WFI instruction can also be +used to inform the hardware platform that suitable interrupts should +preferentially be routed to this hart. WFI is available in all of the +supported S, H, and M privilege modes, and optionally available to +U-mode for implementations that support U-mode interrupts. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct12} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +WFI & 0 & PRIV & 0 & SYSTEM \\ +\end{tabular} +\end{center} + +If an enabled interrupt is present or later becomes present while the +hart is stalled, the interrupt exception will be taken on the +following instruction, i.e., execution resumes in the trap handler and +{\tt mepc} = {\tt pc} + 4. + +\begin{commentary} +The following instruction takes the interrupt exception and trap, so +that a simple return from the trap handler will execute code after the +WFI instruction. +\end{commentary} + +The WFI instruction is just a hint, and a legal implementation is to +implement WFI as a NOP. + +\begin{commentary} +If the implementation does not stall the hart on execution of the +instruction, then the interrupt will be taken on some instruction in +the idle loop containing the WFI, and on a simple return from the +handler, the idle loop will resume execution. +\end{commentary} + +\begin{commentary} +We have removed the earlier requirement that implementations ignore +the {\em rs1} and {\em rd} fields, so non-zero values in these fields +should now raise illegal instruction exceptions. +\end{commentary} + +The WFI instruction can also be executed when interrupts are disabled. +The operation of WFI must be unaffected by the global interrupt bits +in {\tt mstatus} (MIE/HIE/SIE/UIE) (i.e., the hart must resume if a +locally enabled interrupt becomes pending), but should honor the +individual interrupt enables (e.g, MTIE) (i.e., implementations should +avoid resuming the hart if the interrupt is pending but not +individually enabled). WFI is also required to resume execution for +locally enabled interrupts pending at any privilege level, regardless +of the global interrupt enable at each privilege level. + +If the event that causes the hart to resume execution does not cause +an interrupt to be taken, execution will resume at {\tt pc} + 4, and +software must determine what action to take, including looping back to +repeat the WFI if there was no actionable event. + +\begin{commentary} +By allowing wakeup when interrupts are disabled, an alternate entry +point to an interrupt handler can be called that does not require +saving the current context, as the current context can be saved or +discarded before the WFI is executed. + +As implementations are free to implement WFI as a NOP, software must +explicitly check for any relevant pending but disabled interrupts in +the code following an WFI, and should loop back to the WFI if no +suitable interrupt was detected. The {\tt mip}, {\tt hip}, {\tt sip}, +or {\tt uip} registers can be interrogated to determine the presence +of any interrupt in machine, hypervisor, supervisor, or user mode +respectively. + +The operation of WFI is unaffected by the delegation register settings. + +WFI is defined so that an implementation can trap into a higher +privilege mode, either immediately on encountering the WFI or after +some interval to initiate a machine-mode transition to a lower power +state, for example. +\end{commentary} + +\begin{commentary} +The same ``wait-for-event'' template might be used for possible future +extensions that wait on memory locations changing, or message +arrival. +\end{commentary} + +\section{Reset} +\label{sec:reset} + +Upon reset, a hart's privilege mode is set to M. The {\tt mstatus} fields MIE +and MPRV are reset to 0, and the VM field is reset to Mbare. The {\tt pc} is +set to an implementation-defined reset vector. The {\tt mcause} register is +set to a value indicating the cause of the reset. All other hart state is +undefined. + +The {\tt mcause} values after reset have implementation-specific +interpretation, but the value 0 should be returned on implementations +that do not distinguish different reset conditions. Implementations +that distinguish different reset conditions should only use 0 to +indicate the most complete reset (e.g., hard reset). + +\begin{commentary} +Some designs may have multiple causes of reset (e.g., power-on reset, +external hard reset, brownout detected, watchdog timer elapse, +sleep-mode wakeup), which machine-mode software and debuggers may wish +to distinguish. + +{\tt mcause} reset values may alias {\tt mcause} values following +synchronous exceptions. There is no ambiguity in this overlap, since +on reset the {\tt pc} is set to a different value than on other traps. +\end{commentary} + +\section{Non-Maskable Interrupts} +\label{sec:nmi} + +Non-maskable interrupts (NMIs) are only used for hardware error +conditions, and cause an immediate jump to an implementation-defined +NMI vector running in M-mode regardless of the state of a hart's +interrupt enable bits. The {\tt mepc} register is written with the +address of the next instruction to be executed at the time the NMI was +taken, and {\tt mcause} is set to a value indicating the source of the +NMI. The NMI can thus overwrite state in an active machine-mode +interrupt handler. + +The values written to {\tt mcause} on an NMI are +implementation-defined, but a value of 0 is reserved to mean ``unknown +cause'' and implementations that do not distinguish sources of NMIs +via the {\tt mcause} register should return 0. + +Unlike resets, NMIs do not reset processor state, enabling diagnosis, +reporting, and possible containment of the hardware error. + +\section{Physical Memory Attributes} +\label{sec:pma} + +The physical memory map for a complete system includes various address +ranges, some corresponding to memory regions, some to memory-mapped +control registers, and some to empty holes in the address space. Some +memory regions might not support reads, writes, or execution; some +might not support subword or subblock accesses; some might not support +atomic operations; and some might not support cache coherence or might +have different memory models. Similarly, memory-mapped control +registers vary in their supported access widths, support for atomic +operations, and whether read and write accesses have associated side +effects. In RISC-V systems, these properties and capabilities of each +region of the machine's physical address space are termed {\em + physical memory attributes} (PMAs). This section describes RISC-V +PMA terminology and how RISC-V systems implement and check PMAs. + +PMAs are inherent properties of the underlying hardware and rarely +change during system operation. Unlike physical memory protection +values described in Section~\ref{sec:pmp}, PMAs do not vary by +execution context. The PMAs of some memory regions are fixed at chip +design time---for example, for an on-chip ROM. Others are fixed at +board design time, depending, for example, on which other chips are +connected to off-chip buses. Off-chip buses might also support +devices that could be changed on every power cycle (cold pluggable) or +dynamically while the system is running (hot pluggable). Some devices +might be configurable at run time to support different uses that imply +different PMAs---for example, an on-chip scratchpad RAM might be +cached privately by one core in one end-application, or accessed as a +shared non-cached memory in another end-application. + +Most systems will require that at least some PMAs are dynamically +checked in hardware later in the execution pipeline after the physical +address is known, as some operations will not be supported at all +physical memory addresses, and some operations require knowing the +current setting of a configurable PMA attribute. While many other systems +specify some PMAs in the virtual memory page tables and use the TLB to +inform the pipeline of these properties, this approach injects platform-specific +information into a virtualized layer and can cause system errors +unless attributes are correctly initialized in each page-table entry +for each physical memory region. In addition, the available +page sizes might not be optimal for specifying attributes in the +physical memory space, leading to address-space fragmentation and +inefficient use of expensive TLB entries. + +For RISC-V, we separate out specification and checking of PMAs into a +separate hardware structure, the {\em PMA checker}. In many cases, +the attributes are known at system design time for each physical +address region, and can be hardwired into the PMA checker. Where the +attributes are run-time configurable, platform-specific memory-mapped +control registers can be provided to specify these attributes at a +granularity appropriate to each region on the platform (e.g., for an +on-chip SRAM that can be flexibly divided between cacheable and +uncacheable uses). PMAs are checked for any access to physical +memory, including accesses that have undergone virtual to physical +memory translation. To aid in system debugging, we strongly recommend +that, where possible, RISC-V processors precisely trap physical memory +accesses that fail PMA checks. Precise PMA traps might not always be +possible, for example, when probing a legacy bus architecture that +uses access failures as part of the discovery mechanism. In this +case, error responses from slave devices will be reported as imprecise +bus-error interrupts. + +PMAs must also be readable by software to correctly access certain +devices or to correctly configure other hardware components that +access memory, such as DMA engines. As PMAs are tightly tied to a +given physical platform's organization, many details are inherently +platform-specific, as is the means by which software can learn the PMA +values for a platform. The configuration string +(Chapter~\ref{cfgstr}) can encode PMAs for on-chip devices and might +also describe on-chip controllers for off-chip buses that can be +dynamically interrogated to discover attached device PMAs. Some +devices, particularly legacy buses, do not support discovery of PMAs +and so will give error responses or time out if an unsupported access +is attempted. Typically, platform-specific machine-mode code will +extract PMAs and ultimately present this information to higher-level +less-privileged software using some standard representation. + +Where platforms support dynamic reconfiguration of PMAs, an interface +will be provided to set the attributes by passing requests to a +machine-mode driver that can correctly reconfigure the platform. For +example, switching cacheability attributes on some memory regions +might involve platform-specific operations, such as cache flushes, +that are available only to machine-mode. + +\subsection{Main Memory versus I/O versus Empty Regions} + +The most important characterization of a given memory address range is +whether it holds regular main memory, or I/O devices, or is empty. +Regular main memory is required to have a number of properties, +specified below, whereas I/O devices can have a much broader range of +attributes. Memory regions that do not fit into regular main +memory, for example, device scratchpad RAMs, are categorized as I/O +regions. Empty regions are also classified as I/O regions but with +attributes specifying that no accesses are supported. + +\subsection{Supported Access Type PMAs} + +Access types specify which access widths, from 8-bit byte to long +multi-word burst, are supported, and also whether misaligned accesses +are supported for each access width. + +\begin{commentary} +Although software running on a RISC-V hart cannot directly generate +bursts to memory, software might have to program DMA engines to access +I/O devices and might therefore need to know which access sizes are +supported. +\end{commentary} + +Main memory regions always support read, write, and execute of all +access widths required by the attached devices. + +\begin{commentary} +In some cases, the design of a processor or device accessing main +memory might support other widths, but must be able to function with +the types supported by the main memory. +\end{commentary} + +I/O regions can specify which combinations of read, write, or execute +accesses to which data widths are supported. + +\subsection{Atomicity PMAs} + +Atomicity PMAs describes which atomic instructions are supported in +this address region. Main memory regions must support the atomic +operations required by the processors attached. I/O regions may only +support a subset or none of the processor-supported atomic operations. + +Support for atomic instructions is divided into two categories: {\em + LR/SC} and {\em AMOs}. Within AMOs, there are four levels of +support: {\em AMONone}, {\em AMOSwap}, {\em AMOLogical}, and {\em + AMOArithmetic}. AMONone indicates that no AMO operations are +supported. AMOSwap indicates that only {\tt amoswap} instructions are +supported in this address range. AMOLogical indicates that swap +instructions plus all the logical AMOs ({\tt amoand}, {\tt amoor}, +{\tt amoxor}) are supported. AMOArithmetic indicates that all RISC-V +AMOs are supported. For each level of support, naturally aligned AMOs +of a given width are supported if the underlying memory region +supports reads and writes of that width. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|l|l|} + \hline + AMO Class & Supported Operations \\ + \hline + AMONone & {\em None} \\ + AMOSwap & {\tt amoswap} \\ + AMOLogical & above + {\tt amoand}, {\tt amoor}, {\tt amoxor} \\ + AMOArithmetic & above + {\tt amoadd}, {\tt amomin}, {\tt amomax}, {\tt amominu}, {\tt amomaxu} \\ + \hline +\end{tabular} +\end{center} +\caption{Classes of AMOs supported by I/O regions. Main memory + regions must always support all AMOs required by the processor.} +\label{amoclasses} +\end{table*} + +\begin{commentary} +We recommend providing at least AMOLogical support for I/O regions +where possible. Most I/O regions will not support LR/SC accesses, as +these are most conveniently built on top of a cache-coherence scheme. +\end{commentary} + +\subsection{Memory-Ordering PMAs} + +Regions of the address space are classified as either {\em main + memory} or {\em I/O} for the purposes of ordering by the FENCE +instruction and atomic-instruction ordering bits. + +Accesses by one hart to main memory regions are observable not only by +other harts but also by other devices with the capability to initiate +requests in the main memory system (e.g., DMA engines). Main memory +regions always have the standard RISC-V relaxed memory model. + +Accesses by one hart to the I/O space are observable not only by other +harts and bus mastering devices, but also by targeted slave I/O +devices. Within I/O, regions may further be classified as +implementing either {\em relaxed} or {\em strong} ordering. A relaxed +I/O region has no ordering guarantees on how memory accesses made by +one hart are observable by different harts or I/O devices beyond those +enforced by FENCE and AMO instructions. A strongly ordered I/O region +ensures that all accesses made by a hart to that region are only +observable in program order by all other harts or I/O devices. + +Each strongly ordered I/O region specifies a numbered ordering +channel, which is a mechanism by which ordering guarantees can be +provided between different I/O regions. Channel 0 is used to indicate +point-to-point strong ordering only, where only accesses by the hart to the +single associated I/O region are strongly ordered. + +Channel 1 is used to provide global strong ordering across all I/O +regions. Any accesses by a hart to any I/O region associated with +channel 1 can only be observed to have occurred in program order by all +other harts and I/O devices, including relative to accesses made by +that hart to relaxed I/O regions or strongly ordered I/O regions with +different channel numbers. In other words, any access to a region in +channel 1 is equivalent to executing a {\tt fence io,io} +instruction before and after the instruction. + +Other larger channel numbers provide program ordering to accesses by +that hart across any regions with the same channel number. + +Systems might support dynamic configuration of ordering properties on +each memory region. + +\begin{commentary} +Strong ordering can be used to improve compatibility with legacy +device driver code, or to enable increased performance compared to +insertion of explicit ordering instructions when the implementation is +known to not reorder accesses. + +Local strong ordering (channel 0) is the default form of strong +ordering as it is often straightforward to provide if there is only a +single in-order communication path between the hart and the I/O +device. + +Generally, different strongly ordered I/O regions can share the same +ordering channel without additional ordering hardware if they share +the same interconnect path and the path does not reorder requests. +\end{commentary} + +\subsection{Coherence and Cacheability PMAs} + +Coherence is a property defined for a single physical address, and +indicates that writes to that address by one agent will eventually be +made visible to other agents in the system. Coherence is not to be +confused with the memory consistency model of a system, which defines +what values a memory read can return given the previous history of +reads and writes to the entire memory system. In RISC-V platforms, +the use of hardware-incoherent regions is discouraged due to software +complexity, performance, and energy impacts. + +The cacheability of a memory region should not affect the software +view of the region except for differences reflected in other PMAs, +such as main memory versus I/O classification, memory ordering, +supported accesses and atomic operations, and coherence. For this +reason, we treat cacheability as a platform-level setting managed by +machine-mode software only. + +Where a platform supports configurable cacheability settings for a +memory region, a platform-specific machine-mode routine will change +the settings and flush caches if necessary, so the system is only +incoherent during the transition between cacheability settings. This +transitory state should not be visible to lower privilege levels. + +\begin{commentary} +We categorize RISC-V caches into three types: {\em master-private}, +{\em shared}, and {\em slave-private}. Master-private caches are +attached to a single master agent, i.e., one that issues read/write +requests to the memory system. Shared caches are located inbetween +masters and slaves and may be hierarchically organized. Slave-private +caches do not impact coherence, as they are local to a single slave +and do not affect other PMAs at a master, so are not considered +further here. We use {\em private cache} to mean a master-private +cache in the following section, unless explicitly stated otherwise. + +Coherence is straightforward to provide for a shared memory region +that is not cached by any agent. The PMA for such a region would +simply indicate it should not be cached in a private or shared cache. + +Coherence is also straightforward for read-only regions, which can be +safely cached by multiple agents without requiring a cache-coherence +scheme. The PMA for this region would indicate that it can be cached, +but that writes are not supported. + +Some read-write regions might only be accessed by a single agent, in +which case they can be cached privately by that agent without +requiring a coherence scheme. The PMA for such regions would indicate +they can be cached. The data can also be cached in a shared cache, as +other agents should not access the region. + +If an agent can cache a read-write region that is accessible by other +agents, whether caching or non-caching, a cache-coherence scheme is +required to avoid use of stale values. In regions lacking hardware +cache coherence (hardware-incoherent regions), cache coherence can be +implemented entirely in software, but software coherence schemes are +notoriously difficult to implement correctly and often have severe +performance impacts due to the need for conservative software-directed +cache-flushing. Hardware cache-coherence schemes require more complex +hardware and can impact performance due to the cache-coherence probes, +but are otherwise invisible to software. + +For each hardware cache-coherent region, the PMA would indicate that +the region is coherent and which hardware coherence controller to use +if the system has multiple coherence controllers. For some systems, +the coherence controller might be an outer-level shared cache, which +might itself access further outer-level cache-coherence controllers +hierarchically. + +Most memory regions within a platform will be coherent to software, +because they will be fixed as either uncached, read-only, hardware +cache-coherent, or only accessed by one agent. +\end{commentary} + +\subsection{Idempotency PMAs} + +Idempotency PMAs describe whether reads and writes to an address +region are idempotent. Main memory regions are assumed to be +idempotent. For I/O regions, idempotency on reads and writes can be +specified separately (e.g., reads are idempotent but writes are not). +If accesses are non-idempotent, i.e., there is potentially a side +effect on any read or write access, then speculative or redundant +accesses must be avoided. + +For the purposes of defining the idempotency PMAs, changes in observed +memory ordering created by redundant accesses are not considered a +side effect. + +\begin{commentary} +While hardware should always be designed to avoid speculative or +redundant accesses to memory regions marked as non-idempotent, it is +also necessary to ensure software or compiler optimizations do not +generate spurious accesses to non-idempotent memory regions. +\end{commentary} + +\section{Physical Memory Protection} +\label{sec:pmp} + +To support secure processing and contain faults, it is desirable to +limit the physical addresses accessible by a lower-privilege context +running on a hart. A physical memory protection (PMP) unit can be +provided, with per-hart machine-mode control registers to allow +physical memory access privileges (read, write, execute) to be +specified for each physical memory region. The PMP values are checked +in parallel with the PMA checks described in Section~\ref{sec:pma}. + +The granularity and encoding of the PMP access control settings are +platform-specific, and there might be different granularities and +encodings of permissions for different physical memory regions on a +single platform. Certain regions' privileges can be hardwired---for +example, some regions might only ever be visible in machine mode but +no lower-privilege layers. + +\begin{commentary} +Platforms vary widely in demands for physical memory protection, and +so we defer detailed design of PMP structures to each platform. Some +PMP designs might just employ a few CSRs to protect a small number of +physical memory segments, while others might employ memory-resident +protection tables with a protection-table cache indexed by a +protection-table base register to protect large physical memory spaces +with fine granularity. Systems with a protection-table base register +will usually also provide a physical protection domain ID (PDID) +register to denote the current physical protection domain. +\end{commentary} + +PMP checks are applied to all accesses when the hart is running in H, +S, or U modes, and for loads and stores when the MPRV bit is set in +the {\tt mstatus} register and the MPP field in the {\tt mstatus} +register contains H, S, or U. PMP violations will always be trapped +precisely at the processor. + +\section{Mbare addressing environment} +\label{mbare} + +The Mbare environment is entered at reset, or can be entered at any +time thereafter by writing 0 to the VM field in the {\tt mstatus} +register. + +In the Mbare environment all virtual addresses are converted with no +translation into physical addresses, with truncation of any excess +high-order bits. Physical memory protection, as described in +Section~\ref{sec:pmp}, can be used to constrain accesses by +lower-privilege modes. + +\section{Base-and-Bound environments} +\label{bb} + +This section describes the Mbb virtualization environment, which +provides a base-and-bound translation and protection scheme. There +are two variants of base-and-bound, Mbb and Mbbid, depending on +whether there is a single base-and-bound (Mbb) or separate +base-and-bounds for instruction fetches and data accesses (Mbbid). +This simple translation and protection scheme has the advantage of low +complexity and deterministic high performance, as there are never any +TLB misses during operation. + +\subsection{Mbb: Single Base-and-Bound registers ({\tt mbase}, {\tt mbound})} +\label{mbb} + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mbase} \\ +\hline +\multicolumn{1}{|c|}{\tt mbound} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Single Base-and-Bound Registers.} +\label{sbbregs} +\end{figure} + +The simpler Mbb system has a single base {\tt mbase} and single bound +{\tt mbound} register. Mbb is enabled by writing the value 1 to the +VM field in the {\tt mstatus} register. + +The base-and-bound registers define a contiguous virtual-address +segment beginning at virtual address 0 with a length given in bytes by +the value in {\tt mbound}. This virtual address segment is mapped to +a contiguous physical address segment starting at the physical address +given in the {\tt mbase} register. + +When Mbb is in operation, all lower-privilege mode (U, S, H) +instruction-fetch addresses and data addresses are translated by +adding the value of {\tt mbase} to the virtual address to obtain the +physical address. Simultaneously, the virtual address is compared +against the value in the {\tt mbound} register. An address fault exception is +generated if the virtual address is equal to or greater than the +virtual address limit held in the {\tt mbound} register. + +Machine-mode instruction fetch and data accesses are not translated or +checked in Mbb (except for loads and stores when the MPRV bit is set +in {\tt mstatus}), so machine-mode effective addresses are treated as +physical addresses. + +\subsection{Mbbid: Separate Instruction and Data Base-and-Bound registers} + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt mibase} \\ +\hline +\multicolumn{1}{|c|}{\tt mibound} \\ +\hline +\multicolumn{1}{|c|}{\tt mdbase} \\ +\hline +\multicolumn{1}{|c|}{\tt mdbound} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Separate instruction and data base-and-bound registers.} +\label{sbbidregs} +\end{figure} + +The Mbbid scheme separates the virtual address segments for +instruction fetches and data accesses to allow a single physical +instruction segment to be shared by two or more user-level virtual +address spaces while a separate data segment is allocated to each. +Mbbid is enabled by writing 2 to the VM field of {\tt mstatus} +register. + +\begin{commentary} +The split instruction and data base-and-bounds scheme was famously +used on Cray supercomputers, where it avoids most runtime overheads +related to translation and protection provided the segments fit in +physical memory. +\end{commentary} + +The {\tt mibase} and {\tt mibound} registers define the physical start +address and length of the instruction segment respectively, while {\tt + mdbase} and {\tt mdbound} specify the physical start address and +length of the data segment respectively. + +The data virtual address segment begins at address 0, while the +instruction virtual address segment begins half way through the +virtual address space, at an address given by a leading 1 following by +XLEN-1 trailing zeros (e.g., {\tt 0x8000\_0000} for 32-bit address +space systems). The virtual addresses of lower privilege-mode +instruction fetches are first checked to ensure their high bit is set; +if not, an exception is generated. The high bit is subsequently +treated as zero when adding the base to the virtual address and when +checking the bound. + +\begin{commentary} +The data and instruction virtual address segments should not overlap, +and we felt it more important to preserve the potential of zero page +data accesses (using a 12-bit offset from register {\tt x0}) than to +support instruction entry points using JALR with {\tt x0}. In +particular, a single JAL can directly access all of a \wunits{2}{MiB} +code segment. + +To simplify linking, the instruction virtual address segment start +address should be constant independent of the length of the complete +binary. Placing at the midpoint of virtual memory minimizes the +circuitry needed to separate the two segments. +\end{commentary} + +Systems that provide Mbbid must also provide Mbb. Writes to the CSR +addresses corresponding to {\tt mbase} should write the same value to +{\tt mibase} \& {\tt mdbase}, and writes to {\tt mbound} should write +the same value to {\tt mibound} \& {\tt mdbound} to provide compatible +behavior. Reads of {\tt mbase} should return the value in {\tt + mdbase} and reads of {\tt mbound} should return the value in {\tt + mdbound}. When VM is set to Mbb, instruction fetches no longer check +the high bit of the virtual address, and no longer reset the high +bit to zero before adding base and checking bound. + +\begin{commentary} +While the split scheme allows a single physical instruction segment to +be shared across multiple user process instances, it also effectively +prevents the instruction segment from being written by the user +program (data stores are translated separately) and prevents execution +of instructions from the data segment (instruction fetches are +translated separately). These restrictions can prevent some forms of +security attack. + +On the other hand, many modern programming systems require, or benefit +from, some form of runtime-generated code, and so these should use the +simpler Mbb mode with a single segment, which is partly why supporting +this mode is required if providing Mbbid. +\end{commentary} + diff --git a/src/naming.tex b/src/naming.tex new file mode 100644 index 0000000..ac649f2 --- /dev/null +++ b/src/naming.tex @@ -0,0 +1,143 @@ +\chapter{ISA Subset Naming Conventions} +\label{naming} + +This chapter describes the RISC-V ISA subset naming scheme that is +used to concisely describe the set of instructions present in a +hardware implementation, or the set of instructions used by an +application binary interface (ABI). + +\begin{commentary} +The RISC-V ISA is designed to support a wide variety of +implementations with various experimental instruction-set extensions. +We have found that an organized naming scheme simplifies software +tools and documentation. +\end{commentary} + +\section{Case Sensitivity} + +The ISA naming strings are case insensitive. + +\section{Base Integer ISA} +RISC-V ISA strings begin with either RV32I, RV32E, RV64I, or RV128I +indicating the supported address space size in bits for the base +integer ISA. + +\section{Instruction Extensions Names} + +Standard ISA extensions are given a name consisting of a single +letter. For example, the first four standard +extensions to the integer bases are: +``M'' for integer multiplication and division, +``A'' for atomic memory instructions, +``F'' for single-precision floating-point instructions, and +``D'' for double-precision floating-point instructions. +Any RISC-V instruction set variant can be succinctly described by +concatenating the base integer prefix with the names of the included +extensions. For example, ``RV64IMAFD''. + +We have also defined an abbreviation ``G'' to represent the ``IMAFD'' +base and extensions, as this is intended to represent our standard +general-purpose ISA. + +Standard extensions to the RISC-V ISA are given other reserved +letters, e.g., ``Q'' for quad-precision floating-point, or +``C'' for the 16-bit compressed instruction format. + +\section{Version Numbers} +Recognizing that instruction sets may expand or alter over time, we +encode subset version numbers following the subset name. Version +numbers are divided into major and minor version numbers, separated by +a ``p''. If the minor version is ``0'', then ``p0'' can be omitted +from the version string. Changes in major version numbers imply a +loss of backwards compatibility, whereas changes in only the minor +version number must be backwards-compatible. For example, the +original 64-bit standard ISA defined in release 1.0 of this manual can +be written in full as ``RV64I1p0M1p0A1p0F1p0D1p0'', more concisely as +``RV64I1M1A1F1D1'', or even more concisely as ``RV64G1''. The G ISA +subset can be written as ``RV64I2p0M2p0A2p0F2p0D2p0'', or more +concisely ``RV64G2''. + +We introduced the version numbering scheme with the second release, +which we also intend to become a permanent standard. Hence, we define +the default version of a standard subset to be that present at the +time of this document, e.g., ``RV32G'' is equivalent to +``RV32I2M2A2F2D2''. + +\section{Non-Standard Extension Names} + +Non-standard subsets are named using a single ``X'' followed by a name +beginning with a letter and an optional version number. +For example, ``Xhwacha'' names the Hwacha vector-fetch ISA extension; +``Xhwacha2'' and ``Xhwacha2p0'' name version 2.0 of same. + +Non-standard extensions must be separated from other multi-letter extensions +by a single underscore. For example, an ISA with non-standard extensions +Argle and Bargle may be named ``RV64GXargle\_Xbargle''. + +\section{Supervisor-level Instruction Subsets} +Standard supervisor instruction subsets are defined in Volume II, but +are named using ``S'' as a prefix, followed by a supervisor subset name +beginning with a letter and an optional version number. + +Supervisor extensions must be separated from other multi-letter extensions +by a single underscore. + +\section{Supervisor-level Extensions} +Non-standard extensions to the supervisor-level ISA are defined using +the ``SX'' prefix. + +\section{Subset Naming Convention} +Table~\ref{isanametable} summarizes the standardized subset names. +~\\ +\begin{table}[h] +\center +\begin{tabular}{|l|c|} +\hline +Subset & Name \\ +\hline +\hline +\multicolumn{2}{|c|}{Standard General-Purpose ISA}\\ +\hline +Integer & I \\ +Integer Multiplication and Division & M \\ +Atomics & A \\ +Single-Precision Floating-Point & F \\ +Double-Precision Floating-Point & D \\ +\hline +General & G = IMAFD \\ +\hline +\multicolumn{2}{|c|}{Standard User-Level Extensions}\\ +\hline +Quad-Precision Floating-Point & Q \\ +Decimal Floating-Point & L \\ +16-bit Compressed Instructions & C \\ +Bit Manipulation & B \\ +Transactional Memory & T \\ +Packed-SIMD Extensions & P \\ +Vector Extensions & V \\ +User-Level Interrupts & N \\ +\hline +\hline +\multicolumn{2}{|c|}{Non-Standard User-Level Extensions}\\ +\hline +Non-standard extension ``abc'' & Xabc \\ +\hline +\hline +\multicolumn{2}{|c|}{Standard Supervisor-Level ISA}\\ +\hline +Supervisor extension ``def'' & Sdef \\ +\hline +\hline +\multicolumn{2}{|c|}{Non-Standard Supervisor-Level Extensions}\\ +\hline +Supervisor extension ``ghi'' & SXghi \\ +\hline +\end{tabular} +\caption{Standard ISA subset names. The table also defines the + canonical order in which subset names must appear in the name + string, with top-to-bottom in table indicating first-to-last in the + name string, e.g., RV32IMAFDQC is legal, whereas RV32IMAFDCQ is not.} +\label{isanametable} +\end{table} + + diff --git a/src/opcode-map.tex b/src/opcode-map.tex new file mode 100644 index 0000000..a3224c1 --- /dev/null +++ b/src/opcode-map.tex @@ -0,0 +1,22 @@ +\vspace{0.1in} +\definecolor{gray}{RGB}{180,180,180} +\begin{table*}[htbp] +\begin{center} +{\footnotesize +\setlength{\tabcolsep}{4pt} +\begin{tabular}{|r|c|c|c|c|c|c|c|c|} + \hline + inst[4:2] & 000 & 001 & 010 & 011 & 100 & 101 & 110 & \cellcolor{gray}111 \\ \cline{1-1} + inst[6:5] & & & & & & & & \cellcolor{gray}($>32b$) \\ \hline + 00 & LOAD & LOAD-FP & {\em custom-0} & MISC-MEM & OP-IMM & AUIPC & OP-IMM-32 & \cellcolor{gray} $48b$\\ \hline + 01 & STORE & STORE-FP & {\em custom-1} & AMO & OP & LUI & OP-32 & \cellcolor{gray} $64b$ \\ \hline + 10 & MADD & MSUB & NMSUB & NMADD & OP-FP & {\em reserved} & {\em custom-2/rv128} & \cellcolor{gray} $48b$\\ \hline + 11 & BRANCH & JALR & {\em reserved} & JAL & SYSTEM & {\em reserved} & {\em custom-3/rv128} & \cellcolor{gray} $\geq80b$\\ \hline + + \end{tabular} +} +\end{center} +\vspace{-0.15in} +\caption{RISC-V base opcode map, inst[1:0]=11} +\label{opcodemap} +\end{table*} diff --git a/src/p.tex b/src/p.tex new file mode 100644 index 0000000..f66acf7 --- /dev/null +++ b/src/p.tex @@ -0,0 +1,92 @@ +\chapter{``P'' Standard Extension for Packed-SIMD Instructions, + Version 0.1} +\label{sec:packedsimd} + +In this chapter, we outline a standard packed-SIMD extension for +RISC-V. We've reserved the instruction subset name ``P'' for a future +standard set of packed-SIMD extensions. Many other extensions can +build upon a packed-SIMD extension, taking advantage of the wide data +registers and datapaths separate from the integer unit. + +\begin{commentary} +Packed-SIMD extensions, first introduced with the Lincoln Labs TX-2~\cite{tx2}, +have become a popular way to provide higher throughput on data-parallel +codes. Earlier commercial microprocessor implementations include the +Intel i860, HP PA-RISC MAX~\cite{lee-max-ieeemicro1996}, SPARC +VIS~\cite{tremblay-vis-ieeemicro1996}, MIPS +MDMX~\cite{gwennap-mdmx-mpr1996}, PowerPC +AltiVec~\cite{diefendorff-altivec-ieeemicro2000}, Intel x86 +MMX/SSE~\cite{peleg-mmx-ieeemicro1996, raman-sse-ieeemicro2000}, while +recent designs include Intel x86 AVX~\cite{lomont-avx-irm2011} and ARM +Neon~\cite{goodacre-armisa-computer2005}. We describe a standard +framework for adding packed-SIMD in this chapter, but are not actively +working on such a design. In our opinion, packed-SIMD designs represent +a reasonable design point when reusing existing wide datapath resources, +but if significant additional resources are to be devoted to +data-parallel execution then designs based on traditional vector +architectures are a better choice and should use the V extension. + +\end{commentary} + +A RISC-V packed-SIMD extension reuses the floating-point registers +({\tt f0}-{\tt f31}). These registers can be defined to have widths +of FLEN=32 to FLEN=1024. The standard floating-point instruction +subsets require registers of width 32 bits (``F''), 64 bits (``D''), +or 128 bits (``Q''). + +\begin{commentary} +It is natural to use the floating-point registers for packed-SIMD +values rather than the integer registers (PA-RISC and Alpha +packed-SIMD extensions) as this frees the integer registers for +control and address values, simplifies reuse of scalar floating-point +units for SIMD floating-point execution, and leads naturally to a +decoupled integer/floating-point hardware design. The floating-point +load and store instruction encodings also have space to handle wider +packed-SIMD registers. However, reusing the floating-point registers +for packed-SIMD values does make it more difficult to use a recoded +internal format for floating-point values. +\end{commentary} + +The existing floating-point load and store instructions are used to +load and store various-sized words from memory to the {\tt f} +registers. The base ISA supports 32-bit and 64-bit loads and stores, +but the LOAD-FP and STORE-FP instruction encodings allows 8 different +widths to be encoded as shown in Table~\ref{psimdwidth}. When used +with packed-SIMD operations, it is desirable to support non-naturally +aligned loads and stores in hardware. + +\begin{table}[htp] +\begin{center} +\begin{tabular}{|c|l|r|} +\hline +{\em width} field & +Code & +Size in bits\\ +\hline +000 & B & 8 \\ +001 & H & 16 \\ +010 & W & 32 \\ +011 & D & 64 \\ +100 & Q & 128 \\ +101 & Q2 & 256 \\ +110 & Q4 & 512 \\ +111 & Q8 & 1024 \\ +\hline +\end{tabular} +\end{center} +\caption{LOAD-FP and STORE-FP width encoding.} +\label{psimdwidth} +\end{table} + +Packed-SIMD computational instructions operate on packed values in +{\tt f} registers. Each value can be 8-bit, 16-bit, 32-bit, 64-bit, +or 128-bit, and both integer and floating-point representations can be +supported. For example, a 64-bit packed-SIMD extension can treat each +register as 1$\times$64-bit, 2$\times$32-bit, 4$\times$16-bit, or +8$\times$8-bit packed values. + +\begin{commentary} +Simple packed-SIMD extensions might fit in unused 32-bit instruction +opcodes, but more extensive packed-SIMD extensions will likely require +a dedicated 30-bit instruction space. +\end{commentary} diff --git a/src/plic.tex b/src/plic.tex new file mode 100644 index 0000000..798b139 --- /dev/null +++ b/src/plic.tex @@ -0,0 +1,427 @@ +\chapter{Platform-Level Interrupt Controller (PLIC)} +\label{plic} + +This chapter describes the general architecture for the RISC-V +platform-level interrupt controller (PLIC), which prioritizes and +distributes global interrupts in a RISC-V system. + +\section{PLIC Overview} + +Figure~\ref{fig:plic} provides a quick overview of PLIC operation. +The PLIC connects global {\em interrupt sources}, which are usually +I/O devices, to {\em interrupt targets}, which are usually {\em hart + contexts}. The PLIC contains multiple {\em interrupt gateways}, one +per interrupt source, together with a {\em PLIC core} that performs +interrupt prioritization and routing. Global interrupts are sent from +their source to an {\em interrupt gateway} that processes the +interrupt signal from each source and sends a single {\em interrupt + request} to the PLIC core, which latches these in the core interrupt +pending bits (IP). Each interrupt source is assigned a separate +priority. The PLIC core contains a matrix of interrupt enable (IE) +bits to select the interrupts that are enabled for each target. The +PLIC core forwards an {\em interrupt notification} to one or more +targets if the targets have any pending interrupts enabled, and the +priority of the pending interrupts exceeds a per-target threshold. +When the target takes the external interrupt, it sends an {\em + interrupt claim} request to retrieve the identifier of the +highest-priority global interrupt source pending for that target from +the PLIC core, which then clears the corresponding interrupt source +pending bit. After the target has serviced the interrupt, it sends +the associated interrupt gateway an {\em interrupt completion} message +and the interrupt gateway can now forward another interrupt request +for the same source to the PLIC. The rest of this chapter describes +each of these components in detail, though many details are +necessarily platform specific. + +\begin{figure}[tb] +\centering +\includegraphics[width=\textwidth]{figs/PLIC-block-diagram.pdf} +\caption{Platform-Level Interrupt Controller (PLIC) conceptual block + diagram. The figure shows the first two of potentially many + interrupt sources, and the first two of potentially many interrupt + targets. The figure is just intended to show the logic of the + PLIC's operation, not to represent a realistic implementation + strategy.} +\label{fig:plic} +\end{figure} + +\section{Interrupt Sources} + +RISC-V harts can have both local and global interrupt sources. Only +global interrupt sources are handled by the PLIC. + +\subsection{Local Interrupt Sources} + +Each hart has a number of {\em local interrupt sources} that do not +pass through the PLIC, including the standard software interrupts and +timer interrupts for each privilege level. Local interrupts can be +serviced quickly since there will be minimal latency between the +source and the servicing hart, no arbitration is required to determine +which hart will service the request, and the servicing hart can +quickly determine the interrupt source using the {\tt mcause} +register. + +All local interrupts follow a level-based model, where an interrupt is +pending if the corresponding bit in {\tt mip} is set. The interrupt +handler must clear the hardware condition that is causing the {\tt + mip} bit to be set to avoid retaking the interrupt after re-enabling +interrupts on exit from the interrupt handler. + +Additional non-standard local interrupt sources can be made visible to +machine-mode by adding them to the high bits of the {\tt mip}/{\tt + mie} registers, with corresponding additional cause values returned +in the {\tt mcause} register. These additional non-standard local +interrupts may also be made visible to lower privilege levels, using +the corresponding bits in the {\tt mideleg} register. The priority of +non-standard local interrupt sources relative to external, timer, and +software interrupts is platform-specific. + +\subsection{Global Interrupt Sources} + +{\em Global interrupt sources} are those that are prioritized and +distributed by the PLIC. Depending on the platform-specific PLIC +implementation, any global interrupt source could be routed to any +hart context. + +Global interrupt sources can take many forms, including +level-triggered, edge-triggered, and message-signalled. Some sources +might queue up a number of interrupt requests. All global interrupt +sources are converted to a common interrupt request format for the +PLIC. + +\section{Interrupt Targets and Hart Contexts} + +Interrupt targets are usually hart contexts, where a hart context is a +given privilege mode on a given hart (though there are other possible +interrupt targets, such as DMA engines). Not all hart contexts need +be interrupt targets, in particular, if a processor core does not +support delegating external interrupts to lower-privilege modes, then +the lower-privilege hart contexts will not be interrupt targets. +Interrupt notifications generated by the PLIC appear in the {\tt + meip}/{\tt heip}/{\tt seip}/{\tt ueip} bits of the {\tt mip}/{\tt + hip}/{\tt sip}/{\tt uip} registers for M/H/S/U modes respectively. +The notifications only appear in lower-privilege {\em x}{\tt ip} +registers if external interrupts have been delegated to the +lower-privilege modes. + +Each processor core must define a policy on how simultaneous active +interrupts are taken by multiple hart contexts on the core. For the +simple case of a single stack of hart contexts, one for each supported +privileged mode, interrupts for higher-privilege contexts can preempt +execution of interrupt handlers for lower-privilege contexts. A +multithreaded processor core could run multiple independent interrupt +handlers on different hart contexts at the same time. A processor +core could also provide hart contexts that are only used for interrupt +handling to reduce interrupt service latency, and these might preempt +interrupt handlers for other harts on the same core. + +The PLIC treats each interrupt target independently and does not take +into account any interrupt prioritization scheme used by a component +that contains multiple interrupt targets. As a result, the PLIC +provides no concept of interrupt preemption or nesting so this must be +handled by the cores hosting multiple interrupt target contexts. + +\section{Interrupt Gateways} + +The interrupt gateways are responsible for converting global interrupt +signals into a common interrupt request format, and for controlling +the flow of interrupt requests to the PLIC core. At most one +interrupt request per interrupt source can be pending in the PLIC core +at any time, indicated by setting the source's IP bit. The gateway +only forwards a new interrupt request to the PLIC core after receiving +notification that the interrupt handler servicing the previous +interrupt request from the same source has completed. + +If the global interrupt source uses level-sensitive interrupts, the +gateway will convert the first assertion of the interrupt level into +an interrupt request, but thereafter the gateway will not forward an +additional interrupt request until it receives an interrupt completion +message. On receiving an interrupt completion message, if the +interrupt is level-triggered and the interrupt is still asserted, a +new interrupt request will be forwarded to the PLIC core. The gateway +does not have the facility to retract an interrupt request once +forwarded to the PLIC core. If a level-sensitive interrupt source +deasserts the interrupt after the PLIC core accepts the request and +before the interrupt is serviced, the interrupt request remains +present in the IP bit of the PLIC core and will be serviced by a +handler, which will then have to determine that the interrupt device +no longer requires service. + +If the global interrupt source was edge-triggered, the gateway will +convert the first matching signal edge into an interrupt request. +Depending on the design of the device and the interrupt handler, +inbetween sending an interrupt request and receiving notice of its +handler's completion, the gateway might either ignore additional +matching edges or increment a counter of pending interrupts. In +either case, the next interrupt request will not be forwarded to the +PLIC core until the previous completion message has been received. If +the gateway has a pending interrupt counter, the counter will be +decremented when the interrupt request is accepted by the PLIC core. + +Unlike dedicated-wire interrupt signals, message-signalled interrupts +(MSIs) are sent over the system interconnect via a message packet that +describes which interrupt is being asserted. The message is decoded +to select an interrupt gateway, and the relevant gateway then handles +the MSI similar to an edge-triggered interrupt. + +\section{Interrupt Identifiers (IDs)} + +Global interrupt sources are assigned small unsigned integer +identifiers, beginning at the value 1. An interrupt ID of 0 is +reserved to mean ``no interrupt''. + +Interrupt identifiers are also used to break ties when two or more +interrupt sources have the same assigned priority. Smaller values of +interrupt ID take precedence over larger values of interrupt ID. + +\section{Interrupt Priorities} + +Interrupt priorities are small unsigned integers, with a +platform-specific maximum number of supported levels. The priority +value 0 is reserved to mean ``never interrupt'', and interrupt +priority increases with increasing integer values. + +Each global interrupt source has an associated interrupt priority held +in a platform-specific memory-mapped register. Different interrupt +sources need not support the same set of priority values. A valid +implementation can hardwire all input priority levels. Interrupt +source priority registers should be \warl\ fields to allow software to +determine the number and position of read-write bits in each priority +specification, if any. To simplify discovery of supported priority +values, each priority register must support any combination of values +in the bits that are variable within the register, i.e., if there are +two variable bits in the register, all four combinations of values in +those bits must operate as valid priority levels. + +\begin{commentary} + In the degenerate case, all priorities can be hardwired to the value + 1, in which case input priorities are effectively determined by + interrupt ID. + + The supported priority values can be determined as follows: 1) write + all zeros to the priority register then 2) read back the value. Any + set bits are hardwired to 1. Next, 3) write all ones to the + register, and 4) read back the value. Any clear bits are hardwired + to 0. Any set bits that were not found to be hardwired in step 2 are + variable. The supported priority levels are the set of values + obtained by substituting all combinations of ones and zeros in the + variable bits within the priority field. +\end{commentary} + +\section{Interrupt Enables} + +Each target has a vector of interrupt enable (IE) bits, one per +interrupt source. The target will not receive interrupts from sources +that are disabled. The IE bits for a single target should be packed +together as a bit vector in platform-specific memory-mapped control +registers to support rapid context switching of the IE bits for a +target. IE bits are \warl\ fields that can be hardwired to either 0 +or 1. + +\begin{commentary} +A large number of potential IE bits might be hardwired to zero in +cases where some interrupt sources can only be routed to +a subset of targets. + +A larger number of bits might be wired to 1 for an embedded device +with fixed interrupt routing. Interrupt priorities, thresholds, and +hart-internal interrupt masking provide considerable flexibility in +ignoring external interrupts even if a global interrupt source is +always enabled. +\end{commentary} + +\section{Interrupt Priority Thresholds} + +Each interrupt target has an associated priority threshold, held in a +platform-specific memory-mapped register. Only active interrupts that +have a priority strictly greater than the threshold will cause a +interrupt notification to be sent to the target. Different interrupt +targets need not support the same set of priority threshold values. +Interrupt target threshold registers should be \warl\ fields to allow +software to determine the supported thresholds. A threshold register +should always be able to hold the value zero, in which case, no +interrupts are masked. If implemented, the threshold register will +usually also be able to hold the maximum priority level, in which case +all interrupts are masked. + +\begin{commentary} +A simple valid implementation is to hardwire the threshold to zero, in +which case it has no effect, and the individual enable bits will have +to be saved and restored to attain the same effect. While the +function of the threshold can be achieved by changing the +interrupt-enable bits, manipulating a single threshold value avoids +the target having to consider the individual priority levels of each +interrupt source, and saving and restoring all the interrupt enables. +Changing the threshold quickly might be especially important for +systems that move frequently between power states. +\end{commentary} + +\section{Interrupt Notifications} + +Each interrupt target has an {\em external interrupt pending} (EIP) +bit in the PLIC core that indicates that the corresponding target has +a pending interrupt waiting for service. The value in EIP can change +as a result of changes to state in the PLIC core, brought on by +interrupt sources, interrupt targets, or other agents manipulating +register values in the PLIC. The value in EIP is communicated to the +destination target as an interrupt notification. If the target is a +RISC-V hart context, the interrupt notifications arrive on the {\tt + meip}/{\tt heip}/{\tt seip}/{\tt ueip} bits depending on the +privilege level of the hart context. + +\begin{commentary} +In simple systems, the interrupt notifications will be simple wires +connected to the processor implementing a hart. In more complex +platforms, the notifications might be routed as messages across a +system interconnect. +\end{commentary} + +The PLIC hardware only supports multicasting of interrupts, such that +all enabled targets will receive interrupt notifications for a given +active interrupt. + +\begin{commentary} +Multicasting provides rapid response since the fastest responder +claims the interrupt, but can be wasteful in high-interrupt-rate +scenarios if multiple harts take a trap for an interrupt that only one +can successfully claim. Software can modulate the PLIC IE bits as +part of each interrupt handler to provide alternate policies, such as +interrupt affinity or round-robin unicasting. +\end{commentary} + +Depending on the platform architecture and the method used to +transport interrupt notifications, these might take some time to be +received at the targets. The PLIC is guaranteed to eventually deliver +all state changes in EIP to all targets, provided there is no +intervening activity in the PLIC core. + +\begin{commentary} +The value in an interrupt notification is only guaranteed to hold an +EIP value that was valid at some point in the past. In particular, a +second target can respond and claim an interrupt while a notification +to the first target is still in flight, such that when the first +target tries to claim the interrupt it finds it has no active +interrupts in the PLIC core. +\end{commentary} + +\section{Interrupt Claims} + +Sometime after a target receives an interrupt notification, it might +decide to service the interrupt. The target sends an {\em interrupt + claim} message to the PLIC core, which will usually be implemented +as a non-idempotent memory-mapped I/O control register read. On +receiving a claim message, the PLIC core will atomically determine the +ID of the highest-priority pending interrupt for the target and then +clear down the corresponding source's IP bit. The PLIC core will then +return the ID to the target. The PLIC core will return an ID of zero, +if there were no pending interrupts for the target when the claim was +serviced. + +After the highest-priority pending interrupt is claimed by a target +and the corresponding IP bit is cleared, other lower-priority pending +interrupts might then become visible to the target, and so the PLIC +EIP bit might not be cleared after a claim. The interrupt handler +can check the local {\tt meip}/{\tt heip}/{\tt seip}/{\tt ueip} bits +before exiting the handler, to allow more efficient service of other +interrupts without first restoring the interrupted context and taking +another interrupt trap. + +It is always legal for a hart to perform a claim even if the EIP is +not set. In particular, a hart could set the threshold value to maximum +to disable interrupt notifications and instead poll for active +interrupts using periodic claim requests, though a simpler approach to +implement polling would be to clear the external interrupt enable in +the corresponding {\em x}{\tt ie} register for privilege mode {\em x}. + +\section{Interrupt Completion} + +After a handler has completed service of an interrupt, the associated +gateway must be sent an interrupt completion message, usually as a +write to a non-idempotent memory-mapped I/O control register. The +gateway will only forward additional interrupts to the PLIC core after +receiving the completion message. + +\section{Interrupt Flow} + +Figure~\ref{fig:intflow} shows the messages flowing between agents +when handling interrupts via the PLIC. + +\begin{figure}[hb!] +\centering +\includegraphics[width=4.0in]{figs/PLIC-interrupt-flow.pdf} +\caption{ Flow of interrupt processing via the PLIC.} +\label{fig:intflow} +\end{figure} + +The gateway will only forward a single interrupt request at a time to +the PLIC, and not forward subsequent interrupts requests until an +interrupt completion is received. The PLIC will set the IP bit once +it accepts an interrupt request from the gateway, and sometime later +forward an interrupt notification to the target. The target might +take a while to respond to a new interrupt arriving, but will then +send an interrupt claim request to the PLIC core to obtain the +interrupt ID. The PLIC core will atomically return the ID and clear +the corresponding IP bit, after which no other target can claim the +same interrupt request. Once the handler has processed the interrupt, +it sends an interrupt completion message to the gateway to allow a new +interrupt request. + +\section{PLIC Core Specification} + +The operation of the PLIC core can be specified as a non-deterministic +finite-state machine with input and output messsage queues, with the +following atomic actions: + +\begin{itemize} + +\item {\bf Write Register: } A message containing a register write + request is dequeued. One of the internal registers is written, + where an internal register can be a priority, an interrupt-enable + (IE), or a threshold. + +\item {\bf Accept Request: } If the IP bit corresponding to the + interrupt source is clear, a message containing an interrupt request + from a gateway is dequeued and the IP bit is set. + +\item {\bf Process Claim: } An interrupt claim message is dequeued. A + claim-response message is enqueued to the requester with the ID of + the highest-priority active interrupt for that target, and the IP + bit corresponding to this interrupt source is cleared. + +\end{itemize} + +The value in the EIP bit is determined as a combinational function of +the PLIC Core state. Interrupt notifications are sent via an +autonomous process that ensures the EIP value is eventually reflected +at the target. + +Note that the operation of the interrupt gateways is decoupled from +the PLIC core. A gateway can handle parsing of interrupt signals and +processing interrupt completion messages concurrently with other +operations in the PLIC core. + +\begin{commentary} +Figure~\ref{fig:plic} is a high-level conceptual view of the PLIC +design. The PLIC core can be implemented in many ways provided its +behavior can always be understood as following from some sequential +ordering of these atomic actions. In particular, the PLIC might +process multiple actions in a single clock cycle, or might process +each action over many clock cycles. +\end{commentary} + +\section{Controlling Access to the PLIC} + +In the expected use case, only machine mode accesses the source +priority, source pending, and target interrupt enables to configure +the interrupt subsystem. Lower-privilege modes access these features +via ABI, SBI, or HBI calls. The interrupt enables act as a protection +mechanism where a target can only signal completion to an interrupt +gateway that is currently enabled for that target. + +Interrupt handlers that run with lower than machine-mode privilege +need only be able to perform a claim read and a completion write, and +to set their target threshold value. The memory map for these +registers should allow machine mode to protect different targets from +each other's accesses, using either physical memory protection or +virtual memory page protections. + diff --git a/src/preamble.tex b/src/preamble.tex new file mode 100644 index 0000000..45ff257 --- /dev/null +++ b/src/preamble.tex @@ -0,0 +1,121 @@ +% Package includes + +\usepackage{graphicx} +\usepackage{geometry} +\usepackage{array} +\usepackage{colortbl} +\usepackage{hyperref} +\usepackage{placeins} +\usepackage{bbding} +\usepackage{longtable} +\usepackage{multirow} +\usepackage{float} + +% Setup margins + +\setlength{\topmargin}{-0.5in} +\setlength{\textheight}{9in} +\setlength{\oddsidemargin}{0in} +\setlength{\evensidemargin}{0in} +\setlength{\textwidth}{6.5in} + +% Useful macros + +\newcommand{\note}[1]{{\bf [ NOTE: #1 ]}} +\newcommand{\fixme}[1]{{\bf [ FIXME: #1 ]}} +\newcommand{\todo}[1]{\marginpar{\footnotesize #1}} + +\newcommand{\wunits}[2]{\mbox{#1\,#2}} +\newcommand{\um}{\mbox{$\mu$m}} +\newcommand{\xum}[1]{\wunits{#1}{\um}} +\newcommand{\by}[2]{\mbox{#1$\times$#2}} +\newcommand{\byby}[3]{\mbox{#1$\times$#2$\times$#3}} + +\newlength\savedwidth +\newcommand\whline[1]{% + \noalign{% + \global\savedwidth\arrayrulewidth\global\arrayrulewidth 1.5pt% + }% + \cline{#1}% + \noalign{\vskip\arrayrulewidth}% + \noalign{\global\arrayrulewidth\savedwidth}% +} + +% Custom list environments + +\newenvironment{tightlist} +{\begin{itemize} + \setlength{\parsep}{0pt} + \setlength{\itemsep}{-2pt}} +{\end{itemize}} + +\newenvironment{titledtightlist}[1] +{\noindent + ~~\textbf{#1} + \begin{itemize} + \setlength{\parsep}{0pt} + \setlength{\itemsep}{-2pt}} +{\end{itemize}} + +\newenvironment{commentary} +{ \vspace{-0.2in} + \begin{quotation} + \noindent + \small \em + \rule{\linewidth}{1pt}\\ +} +{ + \end{quotation} + \vspace{-0.2in} +} + +% Other commands and parameters + +\pagestyle{myheadings} +\setlength{\parindent}{0in} +\setlength{\parskip}{10pt} +\sloppy + +% Commands for register format figures. + +% New column types to use in tabular environment for instruction formats. +% Allocate 0.18in per bit. +\newcolumntype{I}{>{\centering\arraybackslash}p{0.18in}} +% Two-bit centered column. +\newcolumntype{W}{>{\centering\arraybackslash}p{0.36in}} +% Three-bit centered column. +\newcolumntype{F}{>{\centering\arraybackslash}p{0.54in}} +% Four-bit centered column. +\newcolumntype{Y}{>{\centering\arraybackslash}p{0.72in}} +% Five-bit centered column. +\newcolumntype{R}{>{\centering\arraybackslash}p{0.9in}} +% Six-bit centered column. +\newcolumntype{S}{>{\centering\arraybackslash}p{1.08in}} +% Seven-bit centered column. +\newcolumntype{O}{>{\centering\arraybackslash}p{1.26in}} +% Eight-bit centered column. +\newcolumntype{E}{>{\centering\arraybackslash}p{1.44in}} +% Ten-bit centered column. +\newcolumntype{T}{>{\centering\arraybackslash}p{1.8in}} +% Twelve-bit centered column. +\newcolumntype{M}{>{\centering\arraybackslash}p{2.2in}} +% Sixteen-bit centered column. +\newcolumntype{K}{>{\centering\arraybackslash}p{2.88in}} +% Twenty-bit centered column. +\newcolumntype{U}{>{\centering\arraybackslash}p{3.6in}} +% Twenty-bit centered column. +\newcolumntype{L}{>{\centering\arraybackslash}p{3.6in}} +% Twenty-five-bit centered column. +\newcolumntype{J}{>{\centering\arraybackslash}p{4.5in}} + +\newcommand{\instbit}[1]{\mbox{\scriptsize #1}} +\newcommand{\instbitrange}[2]{~\instbit{#1} \hfill \instbit{#2}~} +\newcommand{\reglabel}[1]{\hfill {\tt #1}\hfill\ } + +\newcommand{\wiri}{\textbf{WIRI}} +\newcommand{\wpri}{\textbf{WPRI}} +\newcommand{\wlrl}{\textbf{WLRL}} +\newcommand{\warl}{\textbf{WARL}} + + + diff --git a/src/preface.tex b/src/preface.tex new file mode 100644 index 0000000..ce3e8df --- /dev/null +++ b/src/preface.tex @@ -0,0 +1,154 @@ +\chapter{Preface} + +This is version \specrev\ of the document describing the RISC-V +user-level architecture. The document contains the following +versions of the RISC-V ISA modules: +\begin{table}[hbt] + \centering + \begin{tabular}{|c|l|c|} + \hline + Base & Version & Frozen? \\ + \hline + RV32I & 2.0 & Y \\ + RV32E & 1.9 & N \\ + RV64I & 2.0 & Y \\ + RV128I & 1.7 & N \\ + \hline + Extension & Version & Frozen? \\ + \hline + M & 2.0 & Y \\ + A & 2.0 & Y \\ + F & 2.0 & Y \\ + D & 2.0 & Y \\ + Q & 2.0 & Y \\ + L & 0.0 & N \\ + C & 1.9 & N \\ + V & 0.1 & N \\ + B & 0.0 & N \\ + T & 0.0 & N \\ + P & 0.1 & N \\ + \hline + \end{tabular} +\end{table} + +To date, no parts of the standard have been officially ratified by the +RISC-V Foundation, but the components labeled ``frozen'' above are not +expected to change during the ratification process. + +The major changes in this version of the document include: +\begin{itemize} +\parskip 0pt +\itemsep 1pt +\item Improvements to the description and commentary. +\item Clarified behavior of FSGNJ.D instruction on single-precision inputs. +\item Corrected the description of the FNMADD.{\em fmt} and FNMSUB.{\em fmt} + instructions, which had suggested the incorrect sign of a zero result. +\item A draft proposal of the V vector instruction set extension. +\item An expanded pseudoinstruction listing. +\item A new table of control and status register (CSR) mappings. +\item Clarification of constraints on load-reserved/store-conditional sequences. +\end{itemize} + +\section*{Preface to Document Version 2.1} + +This is version 2.1 of the document describing the RISC-V user-level +architecture. Note the frozen user-level ISA base and extensions +IMAFDQ version 2.0 have not changed from the previous version of this +document~\cite{riscvtr2}, but some specification holes have been fixed +and the documentation has been improved. Some changes have been made +to the software conventions. +\begin{itemize} +\parskip 0pt +\itemsep 1pt +\item Numerous additions and improvements to the commentary sections. +\item Separate version numbers for each chapter. +\item Modification to long instruction encodings $>$64 bits to avoid + moving the {\em rd} specifier in very long instruction formats. +\item CSR instructions are now described in the base integer format + where the counter registers are introduced, as opposed to only being + introduced later in the floating-point section (and the companion + privileged architecture manual). +\item The SCALL and SBREAK instructions have been renamed to ECALL and + EBREAK, respectively. Their encoding and functionality are unchanged. +\item Clarification of floating-point NaN handling, and a new canonical NaN + value. +\item Clarification of values returned by floating-point to integer + conversions that overflow. +\item Clarification of LR/SC allowed successes and required failures, + including use of compressed instructions in the sequence. +\item A new RV32E base ISA proposal for reduced integer register + counts, supports MAC extensions. +\item A revised calling convention. +\item Relaxed stack alignment for soft-float calling convention, and + description of the RV32E calling convention. +\item A revised proposal for the C compressed extension, version 1.9. +\end{itemize} + +\section*{Preface to Version 2.0} + +This is the second release of the user ISA specification, and we +intend the specification of the base user ISA plus general extensions +(i.e., IMAFD) to remain fixed for future development. The following +changes have been made since Version 1.0~\cite{riscvtr} of this ISA +specification. + +\vspace{-0.1in} +\begin{itemize} +\parskip 0pt +\itemsep 1pt +\item The ISA has been divided into an integer base with several + standard extensions. +\item The instruction formats have been rearranged to make immediate + encoding more efficient. +\item The base ISA has been defined to have a little-endian memory system, with + big-endian or bi-endian as non-standard variants. +\item Load-Reserved/Store-Conditional (LR/SC) instructions have been added in + the atomic instruction extension. +\item AMOs and LR/SC can support the release consistency model. +\item The FENCE instruction provides finer-grain memory and I/O + orderings. +\item An AMO for fetch-and-XOR (AMOXOR) has been added, and the + encoding for AMOSWAP has been changed to make room. +\item The AUIPC instruction, which adds a 20-bit upper immediate to + the PC, replaces the RDNPC instruction, which only read the current + PC value. This results in significant savings for position-independent + code. +\item The JAL instruction has now moved to the U-Type format with an + explicit destination register, and the J instruction has been + dropped being replaced by JAL with {\em rd}={\tt x0}. This removes + the only instruction with an implicit destination register and + removes the J-Type instruction format from the base ISA. There is + an accompanying reduction in JAL reach, but a significant reduction + in base ISA complexity. +\item The static hints on the JALR instruction have been dropped. The + hints are redundant with the {\em rd} and {\em rs1} register + specifiers for code compliant with the standard calling convention. +\item The JALR instruction now clears the lowest bit of the calculated + target address, to simplify hardware and to allow auxiliary information + to be stored in function pointers. +\item The MFTX.S and MFTX.D instructions have been renamed to FMV.X.S and +FMV.X.D, respectively. Similarly, MXTF.S and MXTF.D instructions have been +renamed to FMV.S.X and FMV.D.X, respectively. +\item The MFFSR and MTFSR instructions have been renamed to FRCSR and FSCSR, +respectively. FRRM, FSRM, FRFLAGS, and FSFLAGS instructions have been added +to individually access the rounding mode and exception flags subfields of +the {\tt fcsr}. +\item The FMV.X.S and FMV.X.D instructions now source their operands +from {\em rs1}, instead of {\em rs2}. This change simplifies datapath +design. +\item FCLASS.S and FCLASS.D floating-point classify instructions have been +added. +\item A simpler NaN generation and propagation scheme has been + adopted. +\item For RV32I, the system performance counters have been extended to + 64-bits wide, with separate read access to the upper and lower 32 bits. +\item Canonical NOP and MV encodings have been defined. +\item Standard instruction-length encodings have been defined for 48-bit, + 64-bit, and $>$64-bit instructions. +\item Description of a 128-bit address space variant, RV128, has been added. +\item Major opcodes in the 32-bit base instruction format have been + allocated for user-defined custom extensions. +\item A typographical error that suggested that stores source their + data from {\em rd} has been corrected to refer to {\em rs2}. +\end{itemize} +\vspace{-0.1in} diff --git a/src/priv-csrs.tex b/src/priv-csrs.tex new file mode 100644 index 0000000..b9993d9 --- /dev/null +++ b/src/priv-csrs.tex @@ -0,0 +1,421 @@ +\chapter{Control and Status Registers (CSRs)} + +The SYSTEM major opcode is used to encode all privileged instructions +in the RISC-V ISA. These can be divided into two main classes: those +that atomically read-modify-write control and status registers (CSRs), +and all other privileged instructions. In addition to the user-level +state described in Volume I of this manual, an implementation may +contain additional CSRs, accessible by some subset of the privilege +levels using the CSR instructions described in the user-level manual. +In this chapter, we map out the CSR address space. The following +chapters describe the function of each of the CSRs according to +privilege level, as well as the other privileged instructions which +are generally closely associated with a particular privilege level. +Note that although CSRs and instructions are associated with one +privilege level, they are also accessible at all higher privilege +levels. + +\section{CSR Address Mapping Conventions} + +The standard RISC-V ISA sets aside a 12-bit encoding space (csr[11:0]) +for up to 4,096 CSRs. By convention, the upper 4 bits of the CSR +address (csr[11:8]) are used to encode the read and write +accessibility of the CSRs according to privilege level as shown in +Table~\ref{csrrwpriv}. The top two bits (csr[11:10]) indicate whether +the register is read/write ({\tt 00}, {\tt 01}, or {\tt 10}) or +read-only ({\tt 11}). The next two bits (csr[9:8]) encode the lowest +privilege level that can access the CSR. + +\begin{commentary} +The CSR address convention uses the upper bits of the CSR address to +encode default access privileges. This simplifies error checking in +the hardware and provides a larger CSR space, but does constrain the +mapping of CSRs into the address space. + +Implementations might allow a more-privileged level to trap otherwise +permitted CSR accesses by a less-privileged level to allow these +accesses to be intercepted. This change should be transparent to the +less-privileged software. +\end{commentary} + +\vspace{0.2in} +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|c|c|c|c|l|} +\hline +\multicolumn{3}{|c|}{CSR Address} & Hex & \multicolumn{1}{c|}{Use and Accessibility}\\ \cline{1-3} +[11:10] & [9:8] & [7:6] & & \\ +\hline +\multicolumn{5}{|c|}{User CSRs} \\ +\hline +\tt 00 &\tt 00 &\tt XX & \tt 0x000-0x0FF & Standard read/write \\ +\tt 01 &\tt 00 &\tt XX & \tt 0x400-0x4FF & Standard read/write \\ +\tt 10 &\tt 00 &\tt XX & \tt 0x800-0x8FF & Non-standard read/write \\ +\tt 11 &\tt 00 &\tt 00-10 & \tt 0xC00-0xCBF & Standard read-only \\ +\tt 11 &\tt 00 &\tt 11 & \tt 0xCC0-0xCFF & Non-standard read-only \\ +\hline +\multicolumn{5}{|c|}{Supervisor CSRs} \\ +\hline +\tt 00 &\tt 01 &\tt XX & \tt 0x100-0x1FF & Standard read/write \\ +\tt 01 &\tt 01 &\tt 00-10 & \tt 0x500-0x5BF & Standard read/write \\ +\tt 01 &\tt 01 &\tt 11 & \tt 0x5C0-0x5FF & Non-standard read/write \\ +\tt 10 &\tt 01 &\tt 00-10 & \tt 0x900-0x9BF & Standard read/write shadows \\ +\tt 10 &\tt 01 &\tt 11 & \tt 0x9C0-0x9FF & Non-standard read/write shadows \\ +\tt 11 &\tt 01 &\tt 00-10 & \tt 0xD00-0xDBF & Standard read-only \\ +\tt 11 &\tt 01 &\tt 11 & \tt 0xDC0-0xDFF & Non-standard read-only \\ +\hline +\multicolumn{5}{|c|}{Hypervisor CSRs} \\ +\hline +\tt 00 &\tt 10 &\tt XX & \tt 0x200-0x2FF & Standard read/write \\ +\tt 01 &\tt 10 &\tt 00-10 & \tt 0x600-0x6BF & Standard read/write \\ +\tt 01 &\tt 10 &\tt 11 & \tt 0x6C0-0x6FF & Non-standard read/write \\ +\tt 10 &\tt 10 &\tt 00-10 & \tt 0xA00-0xABF & Standard read/write shadows \\ +\tt 10 &\tt 10 &\tt 11 & \tt 0xAC0-0xAFF & Non-standard read/write shadows \\ +\tt 11 &\tt 10 &\tt 00-10 & \tt 0xE00-0xEBF & Standard read-only \\ +\tt 11 &\tt 10 &\tt 11 & \tt 0xEC0-0xEFF & Non-standard read-only \\ +\hline +\multicolumn{5}{|c|}{Machine CSRs} \\ +\hline +\tt 00 &\tt 11 &\tt XX & \tt 0x300-0x3FF & Standard read/write \\ +\tt 01 &\tt 11 &\tt 00-10 & \tt 0x700-0x79F & Standard read/write \\ +\tt 01 &\tt 11 &\tt 10 & \tt 0x7A0-0x7AF & Standard read/write debug CSRs \\ +\tt 01 &\tt 11 &\tt 10 & \tt 0x7B0-0x7BF & Debug-mode-only CSRs \\ +\tt 01 &\tt 11 &\tt 11 & \tt 0x7C0-0x7FF & Non-standard read/write \\ +\tt 10 &\tt 11 &\tt 00-10 & \tt 0xB00-0xBBF & Standard read/write shadows \\ +\tt 10 &\tt 11 &\tt 11 & \tt 0xBC0-0xBFF & Non-standard read/write shadows \\ +\tt 11 &\tt 11 &\tt 00-10 & \tt 0xF00-0xFBF & Standard read-only \\ +\tt 11 &\tt 11 &\tt 11 & \tt 0xFC0-0xFFF & Non-standard read-only \\ +\hline +\end{tabular} +\end{center} +\caption{Allocation of RISC-V CSR address ranges.} +\label{csrrwpriv} +\end{table*} + +Attempts to access a non-existent CSR raise an illegal instruction +exception. Attempts to access a CSR without appropriate privilege +level or to write a read-only register also raise illegal instruction +exceptions. A read/write register might also contain some bits that +are read-only, in which case writes to the read-only bits are ignored. + +Table~\ref{csrrwpriv} also indicates the convention to allocate CSR +addresses between standard and non-standard uses. The CSR addresses +reserved for non-standard uses will not be redefined by future +standard extensions. The shadow addresses are reserved to provide a +read-write address via which a higher privilege level can modify a +register that is read-only at a lower privilege level. Note that if +one privilege level has already allocated a read/write shadow +address, then any higher privilege level can use the same CSR address +for read/write access to the same register. + +\begin{commentary} +Effective virtualization requires that as many instructions run +natively as possible inside a virtualized environment, while any +privileged accesses trap to the virtual machine +monitor~\cite{goldbergvm}. CSRs that are read-only at some lower +privilege level are shadowed into separate CSR addresses if they are +made read-write at a higher privilege level. This avoids trapping +permitted lower-privilege accesses while still causing traps on +illegal accesses. +\end{commentary} + +Machine-mode standard read-write CSRs {\tt 0x7A0}--{\tt 0x7BF} are +reserved for use by the debug system. Implementations should raise +illegal instruction exceptions on machine-mode access to these registers. + +\section{CSR Listing} + +Tables~\ref{ucsrnames}--\ref{mcsrnames} list the CSRs that have +currently been allocated CSR addresses. The timers, counters, and +floating-point CSRs are the only standard user-level CSRs currently +defined. The other registers are used by privileged code, as described +in the following chapters. Note that not all registers are required +on all implementations. + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{User Trap Setup} \\ +\hline +\tt 0x000 & URW &\tt ustatus & User status register. \\ +\tt 0x004 & URW &\tt uie & User interrupt-enable register. \\ +\tt 0x005 & URW &\tt utvec & User trap handler base address. \\ +\hline +\multicolumn{4}{|c|}{User Trap Handling} \\ +\hline +\tt 0x040 & URW &\tt uscratch & Scratch register for user trap handlers. \\ +\tt 0x041 & URW &\tt uepc & User exception program counter. \\ +\tt 0x042 & URW &\tt ucause & User trap cause. \\ +\tt 0x043 & URW &\tt ubadaddr & User bad address. \\ +\tt 0x044 & URW &\tt uip & User interrupt pending. \\ +\hline +\multicolumn{4}{|c|}{User Floating-Point CSRs} \\ +\hline +\tt 0x001 & URW &\tt fflags & Floating-Point Accrued Exceptions. \\ +\tt 0x002 & URW &\tt frm & Floating-Point Dynamic Rounding Mode. \\ +\tt 0x003 & URW &\tt fcsr & Floating-Point Control and Status +Register ({\tt frm} + {\tt fflags}). \\ +\hline +\multicolumn{4}{|c|}{User Counter/Timers} \\ +\hline +\tt 0xC00 & URO &\tt cycle & Cycle counter for RDCYCLE instruction. \\ +\tt 0xC01 & URO &\tt time & Timer for RDTIME instruction. \\ +\tt 0xC02 & URO &\tt instret & Instructions-retired counter for RDINSTRET instruction. \\ +\tt 0xC03 & URO &\tt hpmcounter3 & Performance-monitoring counter. \\ +\tt 0xC04 & URO &\tt hpmcounter4 & Performance-monitoring counter. \\ +& & \multicolumn{1}{c|}{\vdots} & \ \\ +\tt 0xC1F & URO &\tt hpmcounter31 & Performance-monitoring counter. \\ +\tt 0xC80 & URO &\tt cycleh & Upper 32 bits of {\tt cycle}, RV32I only. \\ +\tt 0xC81 & URO &\tt timeh & Upper 32 bits of {\tt time}, RV32I only. \\ +\tt 0xC82 & URO &\tt instreth & Upper 32 bits of {\tt instret}, RV32I only. \\ +\tt 0xC83 & URO &\tt hpmcounter3h & Upper 32 bits of {\tt hpmcounter3}, RV32I only. \\ +\tt 0xC84 & URO &\tt hpmcounter4h & Upper 32 bits of {\tt hpmcounter4}, RV32I only. \\ +& & \multicolumn{1}{c|}{\vdots} & \ \\ +\tt 0xC9F & URO &\tt hpmcounter31h & Upper 32 bits of {\tt hpmcounter31}, RV32I only. \\ +\hline +\end{tabular} +\end{center} +\caption{Currently allocated RISC-V user-level CSR addresses.} +\label{ucsrnames} +\end{table} + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{Supervisor Trap Setup} \\ +\hline +\tt 0x100 & SRW &\tt sstatus & Supervisor status register. \\ +\tt 0x102 & SRW &\tt sedeleg & Supervisor exception delegation register. \\ +\tt 0x103 & SRW &\tt sideleg & Supervisor interrupt delegation register. \\ +\tt 0x104 & SRW &\tt sie & Supervisor interrupt-enable register. \\ +\tt 0x105 & SRW &\tt stvec & Supervisor trap handler base address. \\ +\hline +\multicolumn{4}{|c|}{Supervisor Trap Handling} \\ +\hline +\tt 0x140 & SRW &\tt sscratch & Scratch register for supervisor trap handlers. \\ +\tt 0x141 & SRW &\tt sepc & Supervisor exception program counter. \\ +\tt 0x142 & SRW &\tt scause & Supervisor trap cause. \\ +\tt 0x143 & SRW &\tt sbadaddr & Supervisor bad address. \\ +\tt 0x144 & SRW &\tt sip & Supervisor interrupt pending. \\ +\hline +\multicolumn{4}{|c|}{Supervisor Protection and Translation} \\ +\hline +\tt 0x180 & SRW &\tt sptbr & Page-table base register. \\ +\hline +\end{tabular} +\end{center} +\caption{Currently allocated RISC-V supervisor-level CSR addresses.} +\label{scsrnames} +\end{table} + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{Hypervisor Trap Setup} \\ +\hline +\tt 0x200 & HRW &\tt hstatus & Hypervisor status register. \\ +\tt 0x202 & HRW &\tt hedeleg & Hypervisor exception delegation register. \\ +\tt 0x203 & HRW &\tt hideleg & Hypervisor interrupt delegation register. \\ +\tt 0x204 & HRW &\tt hie & Hypervisor interrupt-enable register. \\ +\tt 0x205 & HRW &\tt htvec & Hypervisor trap handler base address. \\ +\hline +\multicolumn{4}{|c|}{Hypervisor Trap Handling} \\ +\hline +\tt 0x240 & HRW &\tt hscratch & Scratch register for hypervisor trap handlers. \\ +\tt 0x241 & HRW &\tt hepc & Hypervisor exception program counter. \\ +\tt 0x242 & HRW &\tt hcause & Hypervisor trap cause. \\ +\tt 0x243 & HRW &\tt hbadaddr & Hypervisor bad address. \\ +\tt 0x244 & HRW &\tt hip & Hypervisor interrupt pending. \\ +\hline +\multicolumn{4}{|c|}{Hypervisor Protection and Translation} \\ +\hline +\tt 0x28X & TBD & TBD & TBD. \\ +\hline +\end{tabular} +\end{center} +\caption{Currently allocated RISC-V hypervisor-level CSR addresses.} +\label{hcsrnames} +\end{table} + + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{Machine Information Registers} \\ +\hline +\tt 0xF11 & MRO &\tt mvendorid & Vendor ID. \\ +\tt 0xF12 & MRO &\tt marchid & Architecture ID. \\ +\tt 0xF13 & MRO &\tt mimpid & Implementation ID. \\ +\tt 0xF14 & MRO &\tt mhartid & Hardware thread ID. \\ +\hline +\multicolumn{4}{|c|}{Machine Trap Setup} \\ +\hline +\tt 0x300 & MRW &\tt mstatus & Machine status register. \\ +\tt 0x301 & MRW &\tt misa & ISA and extensions \\ +\tt 0x302 & MRW &\tt medeleg & Machine exception delegation register. \\ +\tt 0x303 & MRW &\tt mideleg & Machine interrupt delegation register. \\ +\tt 0x304 & MRW &\tt mie & Machine interrupt-enable register. \\ +\tt 0x305 & MRW &\tt mtvec & Machine trap-handler base address. \\ +\hline +\multicolumn{4}{|c|}{Machine Trap Handling} \\ +\hline +\tt 0x340 & MRW &\tt mscratch & Scratch register for machine trap handlers. \\ +\tt 0x341 & MRW &\tt mepc & Machine exception program counter. \\ +\tt 0x342 & MRW &\tt mcause & Machine trap cause. \\ +\tt 0x343 & MRW &\tt mbadaddr & Machine bad address. \\ +\tt 0x344 & MRW &\tt mip & Machine interrupt pending. \\ +\hline +\multicolumn{4}{|c|}{Machine Protection and Translation} \\ +\hline +\tt 0x380 & MRW &\tt mbase & Base register. \\ +\tt 0x381 & MRW &\tt mbound & Bound register. \\ +\tt 0x382 & MRW &\tt mibase & Instruction base register. \\ +\tt 0x383 & MRW &\tt mibound & Instruction bound register. \\ +\tt 0x384 & MRW &\tt mdbase & Data base register. \\ +\tt 0x385 & MRW &\tt mdbound & Data bound register. \\ +%\tt 0x3A0 & MRW &\tt pmpselect & Physical memory protection register select. \\ +%\tt 0x3A1 & MRW &\tt pmpdata1 & Physical memory protection data register. \\ +%\tt 0x3A2 & MRW &\tt pmpdata2 & Physical memory protection data register. \\ +%\tt 0x3A3 & MRW &\tt pmpdata3 & Physical memory protection data register. \\ +\hline +\end{tabular} +\end{center} +\caption{Currently allocated RISC-V machine-level CSR addresses.} +\label{mcsrnames} +\end{table} + +\begin{table}[htb!] +\begin{center} +\begin{tabular}{|l|l|l|l|} +\hline +Number & Privilege & Name & Description \\ +\hline +\multicolumn{4}{|c|}{Machine Counter/Timers} \\ +\hline +\tt 0xB00 & MRW &\tt mcycle & Machine cycle counter. \\ +\tt 0xB02 & MRW &\tt minstret & Machine instructions-retired counter. \\ +\tt 0xB03 & MRW &\tt mhpmcounter3 & Machine performance-monitoring counter. \\ +\tt 0xB04 & MRW &\tt mhpmcounter4 & Machine performance-monitoring counter. \\ +& & \multicolumn{1}{c|}{\vdots} & \ \\ +\tt 0xB1F & MRW &\tt mhpmcounter31 & Machine performance-monitoring counter. \\ +\tt 0xB80 & MRW &\tt mcycleh & Upper 32 bits of {\tt mcycle}, RV32I only. \\ +\tt 0xB82 & MRW &\tt minstreth & Upper 32 bits of {\tt minstret}, RV32I only. \\ +\tt 0xB83 & MRW &\tt mhpmcounter3h & Upper 32 bits of {\tt mhpmcounter3}, RV32I only. \\ +\tt 0xB84 & MRW &\tt mhpmcounter4h & Upper 32 bits of {\tt mhpmcounter4}, RV32I only. \\ +& & \multicolumn{1}{c|}{\vdots} & \ \\ +\tt 0xB9F & MRW &\tt mhpmcounter31h & Upper 32 bits of {\tt mhpmcounter31}, RV32I only. \\ +\hline +\multicolumn{4}{|c|}{Machine Counter Setup} \\ +\hline +\tt 0x320 & MRW &\tt mucounteren & User-mode counter enable. \\ +\tt 0x321 & MRW &\tt mscounteren & Supervisor-mode counter enable. \\ +\tt 0x322 & MRW &\tt mhcounteren & Hypervisor-mode counter enable. \\ +\tt 0x323 & MRW &\tt mhpmevent3 & Machine performance-monitoring event selector. \\ +\tt 0x324 & MRW &\tt mhpmevent4 & Machine performance-monitoring event selector. \\ +& & \multicolumn{1}{c|}{\vdots} & \ \\ +\tt 0x33F & MRW &\tt mhpmevent31 & Machine performance-monitoring event selector. \\ +\hline +\multicolumn{4}{|c|}{Debug/Trace Registers (shared with Debug Mode)} \\ +\hline +\tt 0x7A0 & MRW &\tt tselect & Debug/Trace trigger register select. \\ +\tt 0x7A1 & MRW &\tt tdata1 & First Debug/Trace trigger data register. \\ +\tt 0x7A2 & MRW &\tt tdata2 & Second Debug/Trace trigger data register. \\ +\tt 0x7A3 & MRW &\tt tdata3 & Third Debug/Trace trigger data register. \\ +\hline +\multicolumn{4}{|c|}{Debug Mode Registers } \\ +\hline +\tt 0x7B0 & DRW &\tt dcsr & Debug control and status register. \\ +\tt 0x7B1 & DRW &\tt dpc & Debug PC. \\ +\tt 0x7B2 & DRW &\tt dscratch & Debug scratch register. \\ +\hline +\end{tabular} +\end{center} +\caption{Currently allocated RISC-V machine-level CSR addresses.} +\label{mcsrnames} +\end{table} + +\clearpage + +\section{CSR Field Specifications} + + +The following definitions and abbreviations are used in specifying the +behavior of fields within the CSRs. + +\subsection*{Reserved Writes Ignored, Reads Ignore Values (WIRI)} + +Some read-only and read/write registers have read-only fields reserved +for future use. These reserved read-only fields should be ignored on +a read. Writes to these fields have no effect, unless the whole CSR +is read-only, in which case writes might raise an illegal instruction +exception. These fields are labeled \wiri\ in the register +descriptions. + +\subsection*{Reserved Writes Preserve Values, Reads Ignore Values (WPRI)} + +Some whole read/write fields are reserved for future use. Software +should ignore the values read from these fields, and should preserve +the values held in these fields when writing values to other fields of +the same register. These fields are labeled \wpri\ in the register +descriptions. + +\begin{commentary} +To simplify the software model, any backward-compatible future +definition of previously reserved fields within a CSR must cope with +the possibility that a non-atomic read/modify/write sequence is used +to update other fields in the CSR. Alternatively, the original CSR +definition must specify that subfields can only be updated atomically, +which may require a two-instruction clear bit/set bit sequence in +general that can be problematic if intermediate values are not legal. +\end{commentary} + +\subsection*{Write/Read Only Legal Values (WLRL)} + +Some read/write CSR fields specify behavior for only a subset of +possible bit encodings, with other bit encodings reserved. Software +should not write anything other than legal values to such a field, and +should not assume a read will return a legal value unless the last +write was of a legal value, or the register has not been written since +another operation (e.g., reset) set the register to a legal value. +These fields are labeled \wlrl\ in the register descriptions. + +\begin{commentary} +Hardware implementations need only implement enough state bits to +differentiate between the supported values, but must always return the +complete specified bit-encoding of any supported value when read. +\end{commentary} + +Implementations are permitted but not required to raise an illegal +instruction exception if an instruction attempts to write a +non-supported value to a CSR field. Hardware implementations can +return arbitrary bit patterns on the read of a CSR field when the last +write was of an illegal value, but the value returned should +deterministically depend on the previous written value. + +\subsection*{Write Any Values, Reads Legal Values (WARL)} + +Some read/write CSR fields are only defined for a subset of bit +encodings, but allow any value to be written while guaranteeing to +return a legal value whenever read. Assuming that writing the CSR has +no other side effects, the range of supported values can be determined +by attempting to write a desired setting then reading to see if the +value was retained. These fields are labeled \warl\ in the register +descriptions. + +Implementations will not raise an exception on writes of unsupported +values to an \warl\ field. Implementations must always +deterministically return the same legal value after a given illegal +value is written. + diff --git a/src/priv-history.tex b/src/priv-history.tex new file mode 100644 index 0000000..1187868 --- /dev/null +++ b/src/priv-history.tex @@ -0,0 +1,34 @@ +\chapter{History} + +\section*{Acknowledgments} + +Thanks to Allen J. Baum, Ruslan Bukin, Christopher Celio, David +Chisnall, Palmer Dabbelt, Monte Dalrymple, Dennis Ferguson, Mike +Frysinger, Jonathan Neusch{\"a}fer, Rishiyur Nikhil, Stefan O'Rear, +Albert Ou, John Ousterhout, Colin Schmidt, Wesley Terpstra, Matt +Thomas, Tommy Thorn, Ray VanDeWalker, and Reinoud Zandijk for feedback +on the privileged specification. + +\section{Funding} + +Development of the RISC-V architecture and implementations has been +partially funded by the following sponsors. +\begin{itemize} +\item {\bf Par Lab:} Research supported by Microsoft (Award \#024263) + and Intel (Award \#024894) funding and by matching funding by + U.C. Discovery (Award \#DIG07-10227). Additional support came from + Par Lab affiliates Nokia, NVIDIA, Oracle, and Samsung. + +\item {\bf Project Isis:} DoE Award DE-SC0003624. + +\item {\bf ASPIRE Lab}: DARPA PERFECT program, Award HR0011-12-2-0016. + DARPA POEM program Award HR0011-11-C-0100. The Center for Future + Architectures Research (C-FAR), a STARnet center funded by the + Semiconductor Research Corporation. Additional support from ASPIRE + industrial sponsor, Intel, and ASPIRE affiliates, Google, Huawei, + Nokia, NVIDIA, Oracle, and Samsung. +\end{itemize} + +The content of this paper does not necessarily reflect the position or the +policy of the US government and no official endorsement should be +inferred. diff --git a/src/priv-insns.tex b/src/priv-insns.tex new file mode 100644 index 0000000..e1c388c --- /dev/null +++ b/src/priv-insns.tex @@ -0,0 +1,6 @@ +\chapter{RISC-V Privileged Instruction Set Listings} + +This chapter presents instruction set listings for all instructions +defined in the RISC-V Privileged Architecture. + +\input{priv-instr-table} diff --git a/src/priv-instr-table.tex b/src/priv-instr-table.tex new file mode 100644 index 0000000..470d32b --- /dev/null +++ b/src/priv-instr-table.tex @@ -0,0 +1,113 @@ + +\newpage + +\begin{table}[p] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} +& & & & & & & & & & \\ + & +\multicolumn{1}{l}{\instbit{31}} & +\multicolumn{1}{r}{\instbit{27}} & +\instbit{26} & +\instbit{25} & +\multicolumn{1}{l}{\instbit{24}} & +\multicolumn{1}{r}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & I-type \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf Trap-Return Instructions} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000000000010} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & URET \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000100000010} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & SRET \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{001000000010} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & HRET \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{001100000010} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & MRET \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf Interrupt-Management Instructions} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000100000101} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & WFI \\ +\cline{2-11} + + +& +\multicolumn{10}{c}{} & \\ +& +\multicolumn{10}{c}{\bf Memory-Management Instructions} & \\ +\cline{2-11} + + +& +\multicolumn{6}{|c|}{000100000100} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{000} & +\multicolumn{1}{c|}{00000} & +\multicolumn{1}{c|}{1110011} & SFENCE.VM \\ +\cline{2-11} + + +\end{tabular} +\end{center} +\end{small} +\caption{RISC-V Privileged Instructions} +\label{instr-table} +\end{table} + diff --git a/src/priv-intro.tex b/src/priv-intro.tex new file mode 100644 index 0000000..ebed27c --- /dev/null +++ b/src/priv-intro.tex @@ -0,0 +1,261 @@ +\chapter{Introduction} + +{\em This is a draft of the privileged architecture description + document for RISC-V. Feedback welcome. Changes will occur before + the final release. } + +This document describes the RISC-V privileged architecture, which +covers all aspects of RISC-V systems beyond the user-level ISA, +including privileged instructions as well as additional functionality +required for running operating systems and attaching external devices. + +\begin{commentary} +Commentary on our design decisions is formatted as in this paragraph, +and can be skipped if the reader is only interested in the +specification itself. +\end{commentary} + +\begin{commentary} +We briefly note that the entire privileged-level design described in +this document could be replaced with an entirely different +privileged-level design without changing the user-level ISA, and +possibly without even changing the ABI. In particular, this +privileged specification was designed to run existing popular +operating systems, and so embodies the conventional level-based +protection model. Alternate privileged specifications could embody +other more flexible protection domain models. +\end{commentary} + +\section{RISC-V Hardware Platform Terminology} + +A RISC-V hardware platform can contain one or more RISC-V-compatible +processing cores together with other non-RISC-V-compatible cores, +fixed-function accelerators, various physical memory structures, I/O +devices, and an interconnect structure to allow the components to +communicate. + +A component is termed a {\em core} if it contains an independent +instruction fetch unit. A RISC-V-compatible core might support +multiple RISC-V-compatible hardware threads, or {\em harts}, through +multithreading. + +A RISC-V core might have additional specialized instruction set +extensions or an added {\em coprocessor}. We use the term {\em + coprocessor} to refer to a unit that is attached to a RISC-V core +and is mostly sequenced by a RISC-V instruction stream, but which +contains additional architectural state and instruction set +extensions, and possibly some limited autonomy relative to the +primary RISC-V instruction stream. + +We use the term {\em accelerator} to refer to either a +non-programmable fixed-function unit or a core that can operate +autonomously but is specialized for certain tasks. In RISC-V systems, +we expect many programmable accelerators will be RISC-V-based cores +with specialized instruction set extensions and/or customized +coprocessors. An important class of RISC-V accelerators are I/O +accelerators, which offload I/O processing tasks from the main +application cores. + +The system-level organization of a RISC-V hardware platform can range +from a single-core microcontroller to a many-thousand-node cluster of +shared-memory manycore server nodes. Even small systems-on-a-chip +might be structured as a hierarchy of multicomputers and/or +multiprocessors to modularize development effort or to provide secure +isolation between subsystems. + +This document focuses on the privileged architecture visible to each +hart (hardware thread) running within a uniprocessor or a +shared-memory multiprocessor. + +\section{RISC-V Privileged Software Stack Terminology} + +This section describes the terminology we use to describe components +of the wide range of possible privileged software stacks for RISC-V. + +Figure~\ref{fig:privimps} shows some of the possible software stacks +that can be supported by the RISC-V architecture. The left-hand side +shows a simple system that supports only a single application running +on an application execution environment (AEE). The application is +coded to run with a particular application binary interface (ABI). +The ABI includes the supported user-level ISA plus a set of ABI calls to +interact with the AEE. The ABI hides details of the AEE from the +application to allow greater flexibility in implementing the AEE. The +same ABI could be implemented natively on multiple different host OSs, +or could be supported by a user-mode emulation environment running on +a machine with a different native ISA. + +\begin{figure}[th] +\centering +\includegraphics[width=\textwidth]{figs/privimps.pdf} +\caption{Different implementation stacks supporting various forms of + privileged execution.} +\label{fig:privimps} +\end{figure} + +\begin{commentary} +Our graphical convention represents abstract interfaces using black +boxes with white text, to separate them from concrete instances of +components implementing the interfaces. +\end{commentary} + +The middle configuration shows a conventional operating system (OS) +that can support multiprogrammed execution of multiple +applications. Each application communicates over an ABI with the OS, +which provides the AEE. Just as applications interface with an AEE +via an ABI, RISC-V operating systems interface with a supervisor +execution environment (SEE) via a supervisor binary interface (SBI). +An SBI comprises the user-level and supervisor-level ISA together with +a set of SBI function calls. Using a single SBI across all SEE +implementations allows a single OS binary image to run on any SEE. +The SEE can be a simple boot loader and BIOS-style IO system in a +low-end hardware platform, or a hypervisor-provided virtual machine in +a high-end server, or a thin translation layer over a host operating +system in an architecture simulation environment. + +\begin{commentary} +Most supervisor-level ISA definitions do not separate the SBI from the +execution environment and/or the hardware platform, complicating +virtualization and bring-up of new hardware platforms. +\end{commentary} + +The rightmost configuration shows a virtual machine monitor +configuration where multiple multiprogrammed OSs are supported by a +single hypervisor. Each OS communicates via an SBI with the +hypervisor, which provides the SEE. The hypervisor communicates with +the hypervisor execution environment (HEE) using a hypervisor binary +interface (HBI), to isolate the hypervisor from details of the +hardware platform. + +\begin{commentary} +The various ABI, SBI, and HBIs are still a work-in-progress, but we +anticipate the SBI and HBI to support devices via virtualized device +interfaces similar to virtio~\cite{virtio}, and to support device +discovery. In this manner, only one set of device drivers need be +written that can support any OS or hypervisor, and which can also be +shared with the boot environment. +\end{commentary} + +Hardware implementations of the RISC-V ISA will generally require +additional features beyond the privileged ISA to support the various +execution environments (AEE, SEE, or HEE). + +\section{Privilege Levels} + +At any time, a RISC-V hardware thread ({\em hart}) is running at some +privilege level encoded as a mode in one or more CSRs (control and +status registers). Four RISC-V privilege levels are currently defined +as shown in Table~\ref{privlevels}. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|c|c|c|c|} + \hline + Level & Encoding & Name & Abbreviation \\ \hline + 0 & \tt 00 & User/Application & U \\ + 1 & \tt 01 & Supervisor & S \\ + 2 & \tt 10 & Hypervisor & H \\ + 3 & \tt 11 & Machine & M \\ + \hline + \end{tabular} +\end{center} +\caption{RISC-V privilege levels.} +\label{privlevels} +\end{table*} + +Privilege levels are used to provide protection between different +components of the software stack, and attempts to perform operations +not permitted by the current privilege mode will cause an exception to +be raised. These exceptions will normally cause traps into an +underlying execution environment or the HAL. + +The machine level has the highest privileges and is the only mandatory +privilege level for a RISC-V hardware platform. Code run in +machine-mode (M-mode) is inherently trusted, as it has low-level +access to the machine implementation. M-mode is used to manage secure +execution environments on RISC-V. User-mode (U-mode) and +supervisor-mode (S-mode) are intended for conventional application and +operating system usage respectively, while hypervisor-mode (H-mode) is +intended to support virtual machine monitors. + +Each privilege level has a core set of privileged ISA extensions with +optional extensions and variants. For example, machine-mode supports +several optional standard variants for address translation and memory +protection. + +\begin{commentary} +Although none are currently defined, future hypervisor-level ISA +extensions will be added to improve virtualization performance. One +common feature to support hypervisors is to provide a second level of +translation and protection, from {\em supervisor physical addresses} +to {\em hypervisor physical addresses}. +\end{commentary} + +Implementations might provide anywhere from 1 to 4 privilege modes +trading off reduced isolation for lower implementation cost, as shown +in Table~\ref{privcombs}. + +\begin{commentary} +In the description, we try to separate the {\em privilege level} for +which code is written, from the {\em privilege mode} in which it runs, +although the two are often tied. For example, a supervisor-level +operating system can run in supervisor-mode on a system with three +privilege modes, but can also run in user-mode under a classic virtual +machine monitor on systems with two or more privilege modes. In both +cases, the same supervisor-level operating system binary code can be +used, coded to a supervisor-level SBI and hence expecting to be able +to use supervisor-level privileged instructions and CSRs. When +running a guest OS in user mode, all supervisor-level actions will be +trapped and emulated by the SEE running in the higher-privilege level. +\end{commentary} + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|c|l|l|} + \hline + Number of levels & Supported Modes & Intended Usage \\ \hline + 1 & M & Simple embedded systems \\ + 2 & M, U & Secure embedded systems \\ + 3 & M, S, U & Systems running Unix-like operating systems \\ + 4 & M, H, S, U & Systems running Type-1 hypervisors \\ + \hline + \end{tabular} +\end{center} +\caption{Supported combinations of privilege modes.} +\label{privcombs} +\end{table*} + +All hardware implementations must provide M-mode, as this is the only +mode that has unfettered access to the whole machine. The simplest +RISC-V implementations may provide only M-mode, though this will +provide no protection against incorrect or malicious application code. +Many RISC-V implementations will also support at least user mode +(U-mode) to protect the rest of the system from application code. +Supervisor mode (S-mode) can be added to provide isolation between a +supervisor-level operating system and the SEE and HAL code. The +hypervisor mode (H-mode) is intended to provide isolation between a +virtual machine monitor and a HEE and HAL running in machine mode. + +A hart normally runs application code in U-mode until some trap (e.g., +a supervisor call or a timer interrupt) forces a switch to a trap +handler, which usually runs in a more privileged mode. The hart will +then execute the trap handler, which will eventually resume execution +at or after the original trapped instruction in U-mode. Traps that +increase privilege level are termed {\em vertical} traps, while traps +that remain at the same privilege level are termed {\em horizontal} +traps. The RISC-V privileged architecture provides flexible routing +of traps to different privilege layers. + +\begin{commentary} +Horizontal traps can be implemented as vertical traps that +return control to a horizontal trap handler in the less-privileged mode. +\end{commentary} + +\section{Debug Mode} + +Implementations may also include a debug mode to support off-chip +debugging and/or manufacturing test. Debug mode (D-mode) can be +considered an additional privilege mode, with even more access than +M-mode. The separate debug specification proposal describes operation +of a RISC-V hart in debug mode. Debug mode reserves a few CSR +addresses that are only accessible in D-mode, and may also reserve +some portions of the physical memory space on a platform. diff --git a/src/priv-preface.tex b/src/priv-preface.tex new file mode 100644 index 0000000..bc07a43 --- /dev/null +++ b/src/priv-preface.tex @@ -0,0 +1,20 @@ +\chapter{Preface} + +This is version 1.9.1 of the RISC-V privileged architecture +proposal. Changes from version 1.9 include: + +\begin{itemize} + \parskip 0pt + \itemsep 1pt +\item Numerous additions and improvements to the commentary sections. +\item Change configuration string proposal to be use a search process + that supports various formats including Device Tree String and + flattened Device Tree. +\item Made {\tt misa} optionally writable to support modifying base + and supported ISA extensions. CSR address of {\tt misa} changed. +\item Added description of debug mode and debug CSRs. +\item Added a hardware performance monitoring scheme. Simplified the + handling of existing hardware counters, removing privileged versions + of the counters and the corresponding delta registers. +\item Fixed description of SPIE in presence of user-level interrupts. +\end{itemize} diff --git a/src/q.tex b/src/q.tex new file mode 100644 index 0000000..2830cd3 --- /dev/null +++ b/src/q.tex @@ -0,0 +1,305 @@ +\chapter{``Q'' Standard Extension for Quad-Precision Floating-Point, + Version 2.0} + +This chapter describes the Q standard extension for 128-bit binary +floating-point instructions compliant with the IEEE 754-2008 +arithmetic standard. The 128-bit or quad-precision binary +floating-point instruction subset is named ``Q'', and requires +RV64IFD. The floating-point registers are now extended to hold either +a single, double, or quad-precision floating-point value (FLEN=128). + +\section{Quad-Precision Load and Store Instructions} + +New 128-bit variants of LOAD-FP and STORE-FP instructions are added, +encoded with a new value for the funct3 width field. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & Q & dest & LOAD-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & Q & offset[4:0] & STORE-FP \\ +\end{tabular} +\end{center} + +If a floating-point register holds a single-precision or +double-precision value, it is guaranteed that a FSQ of that register +will place a value into memory that when reloaded with a FLQ will +recreate the original value in a register. The data format that is +stored in memory is undefined beyond having this property. + +FLQ and FSQ are only guaranteed to execute atomically if the effective address +is naturally aligned and XLEN=128. + +\section{Quad-Precision Computational Instructions} + +A new supported format is added to the format field of most +instructions, as shown in Table~\ref{tab:fpextfmt}. + +\begin{table}[htp] +\begin{center} +\begin{tabular}{|c|c|l|} +\hline +{\em fmt} field & +Mnemonic & +Meaning \\ +\hline +00 & S & 32-bit single-precision \\ +01 & D & 64-bit double-precision \\ +10 & - & {\em reserved} \\ +11 & Q & 128-bit quad-precision \\ +\hline +\end{tabular} +\end{center} +\caption{Format field encoding.} +\label{tab:fpextfmt} +\end{table} + +The quad-precision floating-point computational instructions are +defined analogously to their double-precision counterparts, but operate on +quad-precision operands and produce quad-precision results. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FADD/FSUB & Q & src2 & src1 & RM & dest & OP-FP \\ +FMUL/FDIV & Q & src2 & src1 & RM & dest & OP-FP \\ +FMIN-MAX & Q & src2 & src1 & MIN/MAX & dest & OP-FP \\ +FSQRT & Q & 0 & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{rs3} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +src3 & Q & src2 & src1 & RM & dest & F[N]MADD/F[N]MSUB \\ +\end{tabular} +\end{center} + +\section{Quad-Precision Convert and Move Instructions} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em int}.{\em fmt} & Q & W[U]/L[U] & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em int} & Q & W[U]/L[U] & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +New floating-point to floating-point conversion instructions FCVT.S.Q, +FCVT.Q.S, FCVT.D.Q, FCVT.Q.D are added. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em fmt}.{\em fmt} & S & Q & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em fmt} & Q & S & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em fmt} & D & Q & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em fmt} & Q & D & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point to floating-point sign-injection instructions, FSGNJ.Q, +FSGNJN.Q, and FSGNJX.Q are defined analogously to the double-precision +sign-injection instruction. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FSGNJ & Q & src2 & src1 & J[N]/JX & dest & OP-FP \\ +\end{tabular} +\end{center} + +FMV.X.Q and FMV.Q.X instructions are not provided, so quad-precision bit +patterns must be moved to the integer registers via memory. + +\begin{commentary} +RV128 supports FMV.X.Q and FMV.Q.X in the Q extension. +\end{commentary} + +\section{Quad-Precision Floating-Point Compare Instructions} + +Floating-point compare instructions perform the specified comparison (equal, +less than, or less than or equal) between floating-point registers {\em rs1} +and {\em rs2} and record the Boolean result in integer register {\em rd}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCMP & Q & src2 & src1 & EQ/LT/LE & dest & OP-FP \\ +\end{tabular} +\end{center} + +\section{Quad-Precision Floating-Point Classify Instruction} + +The quad-precision floating-point classify instruction, FCLASS.Q, is +defined analogously to its double-precision counterpart, but operates on +quad-precision operands. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCLASS & Q & 0 & src & 001 & dest & OP-FP \\ +\end{tabular} +\end{center} diff --git a/src/riscv-privileged.tex b/src/riscv-privileged.tex new file mode 100644 index 0000000..7157739 --- /dev/null +++ b/src/riscv-privileged.tex @@ -0,0 +1,67 @@ +%======================================================================= +% riscv-privileged.tex +%----------------------------------------------------------------------- + +\documentclass[twoside,11pt]{book} + +\input{preamble} + +\newcommand{\privrev}{1.9.2-draft} + +\begin{document} + +\title{{\vspace{-0.7in}\Large {\bf The RISC-V Instruction Set Manual}} \\ + \large {\bf Volume II: Privileged Architecture} \\ + Privileged Architecture Version \privrev \\ + Document Version \privrev \\ + {\bf Warning! This draft specification + will change before being accepted as standard by the RISC-V Foundation, so + implementations made to this draft specification will likely not conform + to the future standard.} + \vspace{-0.1in}} + +\author{Andrew Waterman$^{1}$, Yunsup Lee$^{1}$, Rimas Avi\v{z}ienis$^{2}$, David Patterson$^{2}$, Krste + Asanovi\'{c}$^{1,2}$ \\ + $^{1}$SiFive Inc., \\ + $^{2}$CS Division, EECS Department, University of California, Berkeley \\ + {\tt \{andrew|yunsup\}@sifive.com, + \{rimas|pattrsn|krste\}@eecs.berkeley.edu} \\ + \today + \\ + \\ + An earlier version of this document is also available as Technical Report + \href{http://www.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-161.html} + {\color{blue} UCB/EECS-2016-161}. \\ + } + +\date{} +\maketitle + +\markboth{\privrev: Volume II: RISC-V Privileged Architectures} +{Copyright (c) 2010--2016, The Regents of the University of California. All rights reserved.} +\thispagestyle{empty} + +\frontmatter + +\input{priv-preface} + +\tableofcontents + +\mainmatter + +\input{priv-intro} +\input{priv-csrs} +\input{machine} +\input{supervisor} +\input{hypervisor} +\input{priv-insns} + +\input{plic} +\input{cfgstr} +\input{sbi} +\input{priv-history} + +\bibliographystyle{plain} +\bibliography{riscv-spec} + +\end{document} diff --git a/src/riscv-spec.bib b/src/riscv-spec.bib new file mode 100644 index 0000000..157617b --- /dev/null +++ b/src/riscv-spec.bib @@ -0,0 +1,468 @@ +@Misc{ieee754-2008, + key = "{IEEE}", + title = "{ANSI/IEEE Std 754-2008}, {IEEE} standard for + floating-point arithmetic", + publisher = {"Institute of Electrical and Electronic Engineers"}, + year = 2008 +} + +@inproceedings{riscI-isca1981, + title = {{RISC I}: {A} Reduced Instruction Set {VLSI} Computer}, + author = {David A. Patterson and Carlo H. S\'{e}quin}, + booktitle = {ISCA}, + location = {Minneapolis, Minnesota, USA}, + pages = {443-458}, + year = {1981} +} + +@InProceedings{Katevenis:1983, + author = {Katevenis, Manolis G.H. and Sherburne,Jr., Robert W. and Patterson, David A. and S{\'e}quin, Carlo H.}, + title = {The {RISC II} micro-architecture}, + booktitle = {Proceedings VLSI 83 Conference}, + year = 1983, + month = {August}} + +@article{Katevenis:1984, + author = {Katevenis, Manolis G.H. and Sherburne,Jr., Robert W. and Patterson, David A. and S{\'e}quin, Carlo H.}, + title = {The {RISC II} micro-architecture}, + journal = {Advances in VLSI and Computur Systems}, + issue_date = {Fall 1984}, + volume = {1}, + number = {2}, + month = October, + year = {1984}, + pages = {138--152}, + publisher = {Computer Science Press, Inc.}, + address = {New York, NY, USA}, +} + +@inproceedings{Ungar:1984, + author = {David Ungar and Ricki Blau and Peter Foley and Dain Samples + and David Patterson}, + title = {Architecture of {SOAR}: {Smalltalk} on a {RISC}}, + booktitle = {ISCA}, + address = {Ann Arbor, MI}, + year = {1984}, + pages = {188--197} +} + +@Article{spur-jsscc1989, + author = {David D. Lee and Shing I. Kong and Mark D. Hill and + George S. Taylor and David A. Hodges and Randy + H. Katz and David A. Patterson}, + title = {A {VLSI} Chip Set for a Multiprocessor + Workstation--{Part I}: An {RISC} Microprocessor with + Coprocessor Interface and Support for Symbolic + Processing}, + journal = {IEEE JSSC}, + year = 1989, + volume = 24, + number = 6, + pages = {1688--1698}, + month = {December}} + +@MastersThesis{waterman-ms, + author = {Andrew Waterman}, + title = {{Improving Energy Efficiency and Reducing Code Size with RISC-V Compressed}}, + school = {University of California, Berkeley}, + year = 2011, + Number = {UCB/EECS-2011-63}, +} + +@phdthesis{waterman-phd, + Author = {Waterman, Andrew}, + Title = {Design of the {RISC-V} Instruction Set Architecture}, + School = {University of California, Berkeley}, + Year = {2016}, + Number = {UCB/EECS-2016-1}, +} + +@TechReport{riscvtr, + author = {Andrew Waterman and Yunsup Lee and David A. Patterson and Krste Asanovi\'{c}}, + title = {The {RISC-V} Instruction Set Manual, {Volume I}: {Base} + User-Level {ISA}}, + institution = {EECS Department, University of California, Berkeley}, + year = 2011, + number = {UCB/EECS-2011-62}, + month = {May}} + + + +@Book{kane:mips:1991, + author = {G. Kane and J. Heinrich}, + title = {MIPS RISC Architecture}, + publisher = {Prentice Hall}, + month = {September}, + year = 1991, + note = {ISBN 0135904722}, + edition = {2nd} +} + +@book{patterson:undergrad:2008, + author = {D. A. Patterson and J. L. Hennessy}, + title = {Computer Organization and Design: The + Hardware/Software Interface}, + edition = {4th}, + publisher = {Morgan Kaufmann}, + month = {November}, + year = {2008}, + note = {ISBN 0123744938} +} + +@Book{sweetman:mips:2006, + author = {D. Sweetman}, + title = {See {MIPS} Run}, + edition = {2nd}, + publisher = {Morgan Kaufmann}, + year = {2006}, + month = {October}, + note = {ISBN 0120884216} +} + +@Misc{mips:arch:2010, + author = {MIPS Technologies Inc.}, + title = {{MIPS32} Architecture for Programmers}, + year = {2010}, + note = {\verb!http://www.mips.com/products/architectures/mips32/!} +} + +@Misc{sgi:mipspro:1997, + author = {Silicon Graphics Inc.}, + title = {{MIPSpro} 64-{B}it Porting and Translation Guide}, + year = {1997}, + note = {\verb!http://techpubs.sgi.com/!} +} + +@Misc{openriscarch, + author = {OpenCores}, + title = {{OpenRISC} 1000 Architecture Manual, Architecture + Version 1.0}, + month = {December}, + year = 2012} + +@ARTICLE{tremblay-vis-ieeemicro1996, +author={Tremblay, M. and O'Connor, J.M. and Narayanan, V. and Liang He}, +journal={IEEE Micro}, +title={{VIS} speeds new media processing}, +year={1996}, +month=AUG, +volume={16}, +number={4}, +pages={10 -20}, +keywords={3D graphics environments;RISC-style instructions;UltraSparc;VIS;Visual Instruction Set;media processing;media-processing algorithms;computer graphics;instruction sets;reduced instruction set computing;}, +ISSN={0272-1732},} + +@ARTICLE{lee-max-ieeemicro1996, +author={Lee, R.B.}, +journal={IEEE Micro}, +title={Subword parallelism with {MAX-2}}, +year={1996}, +month=AUG, +volume={16}, +number={4}, +pages={51 -59}, +keywords={MAX-2;instruction extensions;media processing;parallel computation;subword parallelism;word-oriented general-purpose processor;instruction sets;multimedia computing;parallel processing;}, +ISSN={0272-1732},} + +@ARTICLE{peleg-mmx-ieeemicro1996, +author={Peleg, A. and Weiser, U.}, +journal={IEEE Micro}, +title={{MMX} technology extension to the {Intel} architecture}, +year={1996}, +month=AUG, +volume={16}, +number={4}, +pages={42 -50}, +keywords={Intel architecture;MMX;SIMD;communications;compatibility;multimedia;operating systems;microprocessor chips;parallel architectures;}, +ISSN={0272-1732},} + +@ARTICLE{raman-sse-ieeemicro2000, +author={Raman, S.K. and Pentkovski, V. and Keshava, J.}, +journal={IEEE Micro}, +title={Implementing streaming {SIMD} extensions on the {Pentium}-{III} processor }, +year={2000}, +month=JUL/AUG, +volume={20}, +number={4}, +pages={47 -57}, +keywords={Internet;Pentium III developers;demanding multimedia;die size constraints;streaming SIMD extensions;instruction sets;microprocessor chips;}, +ISSN={0272-1732},} + +@misc{lomont-avx-irm2011, +author={Chris Lomont}, +title = {Introduction to {Intel Advanced Vector Extensions}}, +howpublished = {Intel White Paper}, +year = {2011}, +} + +@ARTICLE{goodacre-armisa-computer2005, +author={Goodacre, J. and Sloss, A.N.}, +journal={Computer}, +title={Parallelism and the {ARM} instruction set architecture}, +year={2005}, +month=JULY, +volume={38}, +number={7}, +pages={ 42 - 50}, +keywords={ ARM RISC processor; ARM chip design; ARM instruction set architecture; digital signal processor-like operations; exception handling; multiprocessing; reduced-instruction-set computing; subword parallelism; thread-level parallelism; variable execution time; instruction sets; microprocessor chips; parallel architectures; parallel programming; reduced instruction set computing;}, +ISSN={0018-9162},} + +@ARTICLE{diefendorff-altivec-ieeemicro2000, +author={Diefendorff, K. and Dubey, P.K. and Hochsprung, R. and Scale, H.}, +journal={IEEE Micro}, +title={{AltiVec} extension to {PowerPC} accelerates media processing}, +year={2000}, +month=MAR/APR, +volume={20}, +number={2}, +pages={85 -95}, +keywords={2D image processing;3D graphics;AltiVec extension;Apple G4;Hewlett-Packard added MAX;MDMX;MIPS architecture;MMX;Motorola's MPC 7400;PA-RISC architecture;PowerPC;PowerPC's AltiVec;SSE;Silicon Graphics;Sun enhanced Sparc;alias KNI;handwriting recognition;media mining;media processing;multimedia technologies;narrow/broadband signal processing;personal computing;digital signal processing chips;handwriting recognition;multimedia systems;parallel architectures;}, +ISSN={0272-1732},} + +@misc{gwennap-mdmx-mpr1996, +author={Linley Gwennap}, +title={Digital, {MIPS} Add Multimedia Extensions}, +howpublished = {Microprocessor Report}, +year = {1996}, +} +@article{majc, + author = {Tremblay, Marc and Chan, Jeffrey and Chaudhry, Shailender and Conigliaro, Andrew W. and Tse, Shing Sheung}, + title = {The {MAJC} Architecture: {A} Synthesis of Parallelism and Scalability}, + journal = {IEEE Micro}, + issue_date = {November 2000}, + volume = {20}, + number = {6}, + month = November, + year = {2000}, + pages = {12--25}, + publisher = {IEEE Computer Society Press}, + address = {Los Alamitos, CA, USA}, +} + +@InProceedings{tx2, + author = {John M. Frankovich and H. Philip Peterson}, + title = {A functional description of the {Lincoln} {TX-2} computer}, + booktitle = {Western Joint Computer Conference}, + year = 1957, + address = {Los Angeles, CA}, + month = {February} +} + + +@TechReport{heil-tr1996, + author = {Timothy H. Heil and James E. Smith}, + title = {Selective Dual Path Execution}, + institution = {University of Wisconsin - Madison}, + year = 1996, + month = {November}} + +@inproceedings{Klauser-1998, + author = {Klauser, A. and Austin, T. and Grunwald, D. and Calder, B.}, + title = {Dynamic Hammock Predication for Non-Predicated Instruction Set Architectures}, + booktitle = {Proceedings of the 1998 International Conference on Parallel Architectures and Compilation Techniques}, + series = {PACT '98}, + year = {1998}, + address = {Washington, DC, USA}, +} + +@inproceedings{Kim-micro2005, + author = {Kim, Hyesoon and Mutlu, Onur and Stark, Jared and Patt, Yale N.}, + title = {Wish Branches: Combining Conditional Branching and Predication for Adaptive Predicated Execution}, + booktitle = {Proceedings of the 38th annual IEEE/ACM International Symposium on Microarchitecture}, + series = {MICRO 38}, + year = {2005}, + location = {Barcelona, Spain}, + pages = {43--54}, +} + +@INPROCEEDINGS{Gharachorloo90memoryconsistency, + author = {Kourosh Gharachorloo and Daniel Lenoski and James Laudon + and Phillip Gibbons and Anoop Gupta and John + Hennessy}, + title = {Memory Consistency and Event Ordering in Scalable + Shared-Memory Multiprocessors}, + booktitle = {In Proceedings of the 17th Annual International + Symposium on Computer Architecture}, + year = {1990}, + pages = {15--26} +} + + +@inproceedings{Rajwar:2001:SLE, + author = {Rajwar, Ravi and Goodman, James R.}, + title = {Speculative lock elision: enabling highly concurrent multithreaded execution}, + booktitle = {Proceedings of the 34th annual ACM/IEEE International Symposium on Microarchitecture}, + series = {MICRO 34}, + year = {2001}, + location = {Austin, Texas}, + pages = {294--305}, + publisher = {IEEE Computer Society}, +} + +@Misc{sparcieee1994, + title = {{IEEE} Standard for a 32-bit microprocessor}, + howpublished = {IEEE Std. 1754-1994}, + year = 1994} + + +@Book{parisckane1995, + author = {Gerry Kane}, + title = {PA-RISC 2.0 Architecture}, + publisher = {Prentice Hall}, + year = 1995, + month = {December}, + note = {ISBN 978-0131827349}} + +@article{ibmpower7, + title={{IBM} {POWER7} multicore server processor}, + author={Sinharoy, Balaram and Kalla, R. and Starke, W. J. and Le, + H. Q. and Cargnoni, R. and Van Norstrand, J. A. and + Ronchetti, B. J. and Stuecheli, J. and Leenstra, + J. and Guthrie, G. L. and Nguyen, D. Q. and Blaner, + B. and Marino, C. F. and Retter, E. and Williams, P.}, + journal={IBM Journal of Research and Development}, + volume={55}, + number={3}, + pages={1--1}, + year={2011}, + publisher={IBM} +} + +@article{virtio, + author = {Russell, Rusty}, + title = {Virtio: {Towards} a De-facto Standard for Virtual {I/O} Devices}, + journal = {SIGOPS Oper. Syst. Rev.}, + issue_date = {July 2008}, + volume = {42}, + number = {5}, + month = jul, + year = {2008}, + issn = {0163-5980}, + pages = {95--103}, + numpages = {9}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@ARTICLE{goldbergvm, +author={Goldberg, Robert P.}, +journal={Computer}, +title={Survey of virtual machine research}, +year={1974}, +month={June}, +volume={7}, +number={6}, +pages={34-45} +} + +@Manual{alphapalcode, + title = {{PALcode} for {Alpha} microprocessors: System Design + Guide}, + organization = {Digital Equipment Corporation}, + address = {Maynard, Massachusetts}, + note = {EC-QFGLC-TE}, + month = {May}, + year = 1996} + +@article{transparent-superpages, + author = {Navarro, Juan and Iyer, Sitararn and Druschel, Peter and Cox, Alan}, + title = {Practical, Transparent Operating System Support for Superpages}, + journal = {SIGOPS Oper. Syst. Rev.}, + issue_date = {Winter 2002}, + volume = {36}, + number = {SI}, + month = dec, + year = {2002}, + issn = {0163-5980}, + pages = {89--104}, + numpages = {16}, + url = {http://doi.acm.org/10.1145/844128.844138}, + doi = {10.1145/844128.844138}, + acmid = {844138}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@Book{stretch, + editor = {Werner Buchholz}, + title = {Planning a computer system: {Project} {Stretch}}, + publisher = {McGraw-Hill Book Company}, + year = 1962} + +@Article{ibm360, + author = {G. M. Amdahl and G. A. Blaauw and F. P. Brooks, Jr.}, + title = {Architecture of the {IBM} {System/360}}, + journal = {IBM Journal of R. \& D.}, + year = 1964, + volume = 8, + number = 2 +} + +@inproceedings{cdc6600, + author = {Thornton, James E.}, + title = {Parallel Operation in the {Control Data 6600}}, + booktitle = {Proceedings of the October 27-29, 1964, Fall Joint Computer Conference, Part II: Very High Speed Computer Systems}, + series = {AFIPS '64 (Fall, part II)}, + year = {1965}, + location = {San Francisco, California}, + pages = {33--40} +} + +@InProceedings{jtseng:sbbci, + author = {J. Tseng and K. Asanovi\'c}, + title = {Energy-Efficient Register Access}, + booktitle = {Proc. of the 13th Symposium on Integrated Circuits and + Systems Design}, + address = {Manaus, Brazil}, + month = {September}, + year = 2000, + pages = "377--384" +} + +@TechReport{riscvtr2, + author = {Andrew Waterman and Yunsup Lee and David A. Patterson and Krste Asanovi\'{c}}, + title = {The {RISC-V} Instruction Set Manual, {Volume I}: {Base} + User-Level {ISA} Version 2.0}, + institution = {EECS Department, University of California, Berkeley}, + year = 2014, + number = {UCB/EECS-2014-54}, + month = {May}} + +@Article{ibm370varch, + author = {W. Buchholz}, + title = "{The IBM System/370 vector architecture}", + journal = {IBM Systems Journal}, + year = 1986, + volume = 25, + number = 1, + pages = {51--62} +} + +@PhdThesis{krstephd, + author = {Krste Asanovi\'c}, + title = {Vector Microprocessors}, + school = {University of California at Berkeley}, + year = 1998, + month = {May}, + note = {Available as techreport UCB/CSD-98-1014} +} + +@InProceedings{vp200, + author = "Kenichi Miura and Keiichiro Uchida", + title = "{FACOM Vector Processor System: VP-100/VP-200}", + editor = "Kawalik", + volume = "F7", + booktitle = "Proceedings of NATO Advanced Research Workshop on + High Speed Computing", + year = 1984, + publisher = "Springer-Verlag", + note = "Also in: IEEE Tutorial Supercomputers: Design and + Applications. Kai Hwang(editor), pp59-73" +} +@Manual{crayx1asm, + title = {Cray Assembly Language {(CAL)} for {Cray} {X1} Systems Reference Manual}, + organization = {Cray Inc.}, + edition = {1.1}, + month = {June}, + year = 2003} +} diff --git a/src/riscv-spec.tex b/src/riscv-spec.tex new file mode 100644 index 0000000..eff7333 --- /dev/null +++ b/src/riscv-spec.tex @@ -0,0 +1,72 @@ +%======================================================================= +% riscv-spec.tex +%----------------------------------------------------------------------- + +\documentclass[twoside,11pt]{book} + +\input{preamble} + +\newcommand{\specrev}{2.2-draft} + +\begin{document} + +\title{\vspace{-0.7in}\Large {\bf The RISC-V Instruction Set Manual} \\ + \large {\bf Volume I: User-Level ISA} \\ + Document Version \specrev + \vspace{-0.1in}} + +\author{Andrew Waterman, Yunsup Lee, David Patterson, Krste + Asanovi\'{c} \\ + CS Division, EECS Department, University of California, Berkeley \\ + {\tt \{waterman|yunsup|pattrsn|krste\}@eecs.berkeley.edu} \\ + \today + \\ + \\ + \\ + An earlier version of this document is also available as Technical Report + \href{http://www.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-118.html} + {\color{blue} UCB/EECS-2016-118}. \\ + } + +\date{} +\maketitle + +\markboth{Volume I: RISC-V User-Level ISA V\specrev} +{Copyright \copyright\,2010--2016, The Regents of the University of California. All rights reserved.} +\thispagestyle{empty} + +\frontmatter + +\input{preface} + +\tableofcontents + +\mainmatter + +\input{intro} +\input{rv32} +\input{rv32e} +\input{rv64} +\input{m} +\input{a} +\input{f} +\input{d} +\input{gmaps} +\input{extensions} +\input{naming} +\input{q} +\input{l} +\input{c} +\input{v} +\input{b} +\input{t} +\input{p} +\input{rv128} +\input{calling} +\input{assembly} +\input{history} + +\bibliographystyle{plain} +\bibliography{riscv-spec} + +\end{document} diff --git a/src/rv128.tex b/src/rv128.tex new file mode 100644 index 0000000..17075d0 --- /dev/null +++ b/src/rv128.tex @@ -0,0 +1,64 @@ +\chapter{RV128I Base Integer Instruction Set, Version 1.7} +\label{rv128} + +\begin{quote} +{\em ``There is only one mistake that can be made in computer design that is +difficult to recover from---not having enough address bits for memory +addressing and memory management.''} Bell and Strecker, ISCA-3, 1976. +\end{quote} + +This chapter describes RV128I, a variant of the RISC-V ISA +supporting a flat 128-bit address space. The variant is a +straightforward extrapolation of the existing RV32I and RV64I designs. + +\begin{commentary} +The primary reason to extend integer register width is to support +larger address spaces. It is not clear when a flat address space larger +than 64 bits will be required. At the time of writing, the fastest +supercomputer in the world as measured by the Top500 benchmark had +over \wunits{1}{PB} of DRAM, and would require over 50 bits of address +space if all the DRAM resided in a single address space. Some +warehouse-scale computers already contain even larger quantities of +DRAM, and new dense solid-state non-volatile memories and fast +interconnect technologies might drive a demand for even larger memory +spaces. Exascale systems research is targeting \wunits{100}{PB} +memory systems, which occupy 57 bits of address space. At historic +rates of growth, it is possible that greater than 64 bits of address +space might be required before 2030. + +History suggests that whenever it becomes clear that more than 64 bits +of address space is needed, architects will repeat intensive debates +about alternatives to extending the address space, including +segmentation, 96-bit address spaces, and software workarounds, until, +finally, flat 128-bit address spaces will be adopted as the simplest +and best solution. + +We have not frozen the RV128 spec at this time, as there might be need +to evolve the design based on actual usage of 128-bit address spaces. +\end{commentary} + +RV128I builds upon RV64I in the same way RV64I builds upon RV32I, with +integer registers extended to 128 bits (i.e., XLEN=128). Most integer +computational instructions are unchanged as they are defined to +operate on XLEN bits. The RV64I ``*W'' integer instructions that +operate on 32-bit values in the low bits of a register are retained, +and a new set of ``*D'' integer instructions that operate on 64-bit +values held in the low bits of the 128-bit integer registers are +added. The ``*D'' instructions consume two major opcodes (OP-IMM-64 +and OP-64) in the standard 32-bit encoding. + +Shifts by an immediate (SLLI/SRLI/SRAI) are now encoded using the low +7 bits of the I-immediate, and variable shifts (SLL/SRL/SRA) use the +low 7 bits of the shift amount source register. + +A LDU (load double unsigned) instruction is added using the existing +LOAD major opcode, along with new LQ and SQ instructions to load and +store quadword values. SQ is added to the STORE major opcode, while +LQ is added to the MISC-MEM major opcode. + +The floating-point instruction set is unchanged, although the 128-bit +Q floating-point extension can now support FMV.X.Q and FMV.Q.X +instructions, together with additional FCVT instructions to and from +the T (128-bit) integer format. + + diff --git a/src/rv32.tex b/src/rv32.tex new file mode 100644 index 0000000..77824e5 --- /dev/null +++ b/src/rv32.tex @@ -0,0 +1,1359 @@ +\chapter{RV32I Base Integer Instruction Set, Version 2.0} +\label{rv32} + +This chapter describes version 2.0 of the RV32I base integer +instruction set. Much of the commentary also applies to the RV64I +variant. + +\begin{commentary} +RV32I was designed to be sufficient to form a compiler target and to +support modern operating system environments. The ISA was also +designed to reduce the hardware required in a minimal implementation. +RV32I contains 47 unique instructions, though a simple implementation +might cover the eight SCALL/SBREAK/CSRR* instructions with a single +SYSTEM hardware instruction that always traps and might be able to +implement the FENCE and FENCE.I instructions as NOPs, reducing +hardware instruction count to 38 total. RV32I can emulate almost any +other ISA extension (except the A extension, which requires additional +hardware support for atomicity). +\end{commentary} + +\section{Programmers' Model for Base Integer Subset} + +Figure~\ref{gprs} shows the user-visible state for the base integer +subset. There are 31 general-purpose registers {\tt x1}--{\tt x31}, +which hold integer values. Register {\tt x0} is hardwired to the +constant 0. There is no hardwired subroutine return address link +register, but the standard software calling convention uses register +{\tt x1} to hold the return address on a call. For RV32, the {\tt x} +registers are 32 bits wide, and for RV64, they are 64 bits wide. This +document uses the term XLEN to refer to the current width of an {\tt + x} register in bits (either 32 or 64). + +There is one additional user-visible register: the program counter {\tt pc} +holds the address of the current instruction. + +\begin{commentary} +The number of available architectural registers can have large impacts +on code size, performance, and energy consumption. Although 16 +registers would arguably be sufficient for an integer ISA running +compiled code, it is impossible to encode a complete ISA with 16 +registers in 16-bit instructions using a 3-address format. Although a +2-address format would be possible, it would increase instruction +count and lower efficiency. We wanted to avoid intermediate +instruction sizes (such as Xtensa's 24-bit instructions) to simplify +base hardware implementations, and once a 32-bit instruction size was +adopted, it was straightforward to support 32 integer registers. A +larger number of integer registers also helps performance on +high-performance code, where there can be extensive use of loop +unrolling, software pipelining, and cache tiling. + +For these reasons, we chose a conventional size of 32 integer +registers for the base ISA. Dynamic register usage tends to be +dominated by a few frequently accessed registers, and regfile +implementations can be optimized to reduce access energy for the +frequently accessed registers~\cite{jtseng:sbbci}. The optional +compressed 16-bit instruction format mostly only accesses 8 registers +and hence can provide a dense instruction encoding, while additional +instruction-set extensions could support a much larger register space +(either flat or hierarchical) if desired. + +For resource-constrained embedded applications, we have defined the +RV32E subset, which only has 16 registers (Chapter~\ref{rv32e}). +\end{commentary} + +\begin{figure}[H] +{\footnotesize +\begin{center} +\begin{tabular}{p{2in}} +\instbitrange{XLEN-1}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ \ \ x0 / zero}} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x1\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x2\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x3\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x4\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x5\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x6\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x7\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x8\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ x9\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x10\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x11\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x12\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x13\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x14\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x15\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x16\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x17\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x18\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x19\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x20\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x21\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x22\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x23\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x24\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x25\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x26\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x27\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x28\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x29\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x30\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ x31\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{c}{XLEN} \\ + +\instbitrange{XLEN-1}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{pc}} \\ \cline{1-1} +\multicolumn{1}{c}{XLEN} \\ +\end{tabular} +\end{center} +} +\caption{RISC-V user-level base integer register state.} +\label{gprs} +\end{figure} + +\newpage + +\section{Base Instruction Formats} + +In the base ISA, there are four core instruction formats (R/I/S/U), as +shown in Figure~\ref{fig:baseinstformats}. All are a fixed 32 bits in +length and must be aligned on a four-byte boundary in memory. An +instruction address misaligned exception is generated on a taken +branch or unconditional jump if the target address is not four-byte +aligned. No instruction fetch misaligned exception is generated for a +conditional branch that is not taken. + +\vspace{-0.2in} +\begin{figure}[h] +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{p{1.2in}@{}p{0.8in}@{}p{0.8in}@{}p{0.6in}@{}p{0.8in}@{}p{1in}l} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\cline{1-6} +\multicolumn{1}{|c|}{funct7} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +R-type \\ +\cline{1-6} +\\ +\cline{1-6} +\multicolumn{2}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +I-type \\ +\cline{1-6} +\\ +\cline{1-6} +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & +S-type \\ +\cline{1-6} +\\ +\cline{1-6} +\multicolumn{4}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +U-type \\ +\cline{1-6} +\end{tabular} +\end{center} +\caption{RISC-V base instruction formats.} +\label{fig:baseinstformats} +\end{figure} + +The RISC-V ISA keeps the source ({\em rs1} and {\em rs2}) and +destination ({\em rd}) registers at the same position in all formats +to simplify decoding. Immediates are packed towards the leftmost +available bits in the instruction and have been allocated to reduce +hardware complexity. In particular, the sign bit for all immediates +is always in bit 31 of the instruction to speed sign-extension +circuitry. + +\begin{commentary} +Decoding register specifiers is usually on the critical paths in +implementations, and so the instruction format was chosen to keep all +register specifiers at the same position in all formats at the expense +of having to move immediate bits across formats (a property shared +with RISC-IV aka. SPUR~\cite{spur-jsscc1989}). + +In practice, most immediates are either small or require all XLEN +bits. We chose an asymmetric immediate split (12 bits in regular +instructions plus a special load upper immediate instruction with 20 +bits) to increase the opcode space available for regular instructions. +In addition, these immediates are all sign-extended. We did not +observe a benefit to using zero-extension for some immediates and +wanted to keep the ISA as simple as possible. +\end{commentary} + +\section{Immediate Encoding Variants} + +There are a further two variants of the instruction formats (SB/UJ) +based on the handling of immediates, as shown in +Figure~\ref{fig:baseinstformatsimm}. + +\begin{figure}[h] +\begin{small} +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{p{0.3in}@{}p{0.8in}@{}p{0.6in}@{}p{0.18in}@{}p{0.7in}@{}p{0.6in}@{}p{0.6in}@{}p{0.3in}@{}p{0.5in}l} +\\ +\multicolumn{1}{c}{\instbit{31}} & +\instbitrange{30}{25} & +\instbitrange{24}{21} & +\multicolumn{1}{c}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{8} & +\multicolumn{1}{c}{\instbit{7}} & +\instbitrange{6}{0} \\ +\cline{1-9} +\multicolumn{2}{|c|}{funct7} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{2}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +R-type \\ +\cline{1-9} +\\ +\cline{1-9} +\multicolumn{4}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{2}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +I-type \\ +\cline{1-9} +\\ +\cline{1-9} +\multicolumn{2}{|c|}{imm[11:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{2}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} & +S-type \\ +\cline{1-9} +\\ +\cline{1-9} +\multicolumn{1}{|c|}{imm[12]} & +\multicolumn{1}{c|}{imm[10:5]} & +\multicolumn{2}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:1]} & +\multicolumn{1}{c|}{imm[11]} & +\multicolumn{1}{c|}{opcode} & +SB-type \\ +\cline{1-9} +\\ +\cline{1-9} +\multicolumn{6}{|c|}{imm[31:12]} & +\multicolumn{2}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +U-type \\ +\cline{1-9} +\\ +\cline{1-9} +\multicolumn{1}{|c|}{imm[20]} & +\multicolumn{2}{c|}{imm[10:1]} & +\multicolumn{1}{c|}{imm[11]} & +\multicolumn{2}{c|}{imm[19:12]} & +\multicolumn{2}{c|}{rd} & +\multicolumn{1}{c|}{opcode} & +UJ-type \\ +\cline{1-9} +\end{tabular} +\end{center} +\end{small} +\caption{RISC-V base instruction formats showing immediate variants.} +\label{fig:baseinstformatsimm} +\end{figure} + +In Figure~\ref{fig:baseinstformatsimm} each immediate +subfield is labeled with the bit position (imm[{\em x}\,]) in the +immediate value being produced, rather than the bit position within +the instruction's immediate field as is usually done. +Figure~\ref{fig:immtypes} shows the immediates produced by each of the +base instruction formats, and is labeled to show which instruction +bit (inst[{\em y}\,]) produces each bit of the immediate value. + +\begin{figure}[h] +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{p{0.2in}@{}p{1.2in}@{}p{1.0in}@{}p{0.2in}@{}p{0.7in}@{}p{0.7in}@{}p{0.2in}l} +\\ +\multicolumn{1}{c}{\instbit{31}} & +\instbitrange{30}{20} & +\instbitrange{19}{12} & +\multicolumn{1}{c}{\instbit{11}} & +\instbitrange{10}{5} & +\instbitrange{4}{1} & +\multicolumn{1}{c}{\instbit{0}} & +\\ +\cline{1-7} +\multicolumn{4}{|c|}{--- inst[31] ---} & +\multicolumn{1}{c|}{inst[30:25]} & +\multicolumn{1}{c|}{inst[24:21]} & +\multicolumn{1}{c|}{inst[20]} & +I-immediate \\ +\cline{1-7} +\\ +\cline{1-7} +\multicolumn{4}{|c|}{--- inst[31] ---} & +\multicolumn{1}{c|}{inst[30:25]} & +\multicolumn{1}{c|}{inst[11:8]} & +\multicolumn{1}{c|}{inst[7]} & +S-immediate \\ +\cline{1-7} +\\ +\cline{1-7} +\multicolumn{3}{|c|}{--- inst[31] ---} & +\multicolumn{1}{c|}{inst[7]} & +\multicolumn{1}{c|}{inst[30:25]} & +\multicolumn{1}{c|}{inst[11:8]} & +\multicolumn{1}{c|}{0} & +B-immediate \\ +\cline{1-7} +\\ +\cline{1-7} +\multicolumn{1}{|c|}{inst[31]} & +\multicolumn{1}{c|}{inst[30:20]} & +\multicolumn{1}{c|}{inst[19:12]} & +\multicolumn{4}{c|}{--- 0 ---} & +U-immediate \\ +\cline{1-7} +\\ +\cline{1-7} +\multicolumn{2}{|c|}{--- inst[31] ---} & +\multicolumn{1}{c|}{inst[19:12]} & +\multicolumn{1}{c|}{inst[20]} & +\multicolumn{1}{c|}{inst[30:25]} & +\multicolumn{1}{c|}{inst[24:21]} & +\multicolumn{1}{c|}{0} & +J-immediate \\ +\cline{1-7} +\end{tabular} +\end{center} +\caption{Types of immediate produced by RISC-V instructions. The fields are labeled with the + instruction bits used to construct their value. Sign extension + always uses inst[31].} +\label{fig:immtypes} +\end{figure} + +The only difference between the S and SB formats is that the 12-bit +immediate field is used to encode branch offsets in multiples of 2 in +the SB format. Instead of shifting all bits in the +instruction-encoded immediate left by one in hardware as is +conventionally done, the middle bits (imm[10:1]) and sign bit stay in +fixed positions, while the lowest bit in S format (inst[7]) encodes a +high-order bit in SB format. + +Similarly, the only difference between the U and UJ formats is +that the 20-bit immediate is shifted left by 12 bits to form U +immediates and by 1 bit to form J immediates. The location of +instruction bits in the U and UJ format immediates is chosen to +maximize overlap with the other formats and with each other. + +\begin{commentary} +Sign-extension is one of the most critical operations on immediates +(particularly in RV64I), and in RISC-V the sign bit for all immediates +is always held in bit 31 of the instruction to allow sign-extension to +proceed in parallel with instruction decoding. + +Although more complex implementations might have separate adders for +branch and jump calculations and so would not benefit from keeping the +location of immediate bits constant across types of instruction, we +wanted to reduce the hardware cost of the simplest implementations. +By rotating bits in the instruction encoding of B and J immediates +instead of using dynamic hardware muxes to multiply the immediate by +2, we reduce instruction signal fanout and immediate mux costs by +around a factor of 2. The scrambled immediate encoding will add +negligible time to static or ahead-of-time compilation. For dynamic +generation of instructions, there is some small additional +overhead, but the most common short forward branches have +straightforward immediate encodings. +\end{commentary} + +\section{Integer Computational Instructions} + +Most integer computational instructions operate on XLEN bits of values +held in the integer register file. Integer computational instructions +are either encoded as register-immediate operations using the I-type +format or as register-register operations using the R-type format. +The destination is register {\em rd} for both register-immediate and +register-register instructions. No integer computational instructions +cause arithmetic exceptions. + +\begin{commentary} +We did not include special instruction set support for overflow checks +on integer arithmetic operations, as many overflow checks can be +cheaply implemented using RISC-V branches. Overflow checking for +unsigned addition requires only a single additional branch instruction +after the addition. Similarly, signed array bounds checking requires +only a single branch instruction. Overflow checks for signed addition +require several instructions depending on whether the addend is an +immediate or a variable. We considered adding branches that test if +the sum of their signed register operands would overflow, but +ultimately chose to omit these from the base ISA. +\end{commentary} + +\subsubsection*{Integer Register-Immediate Instructions} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +I-immediate[11:0] & src & ADDI/SLTI[U] & dest & OP-IMM \\ +I-immediate[11:0] & src & ANDI/ORI/XORI & dest & OP-IMM \\ +\end{tabular} +\end{center} +ADDI adds the sign-extended 12-bit immediate to register {\em rs1}. +Arithmetic overflow is ignored and the result is simply the low +XLEN bits of the result. ADDI {\em rd, rs1, 0} is used to implement the +MV {\em rd, rs1} assembler pseudo-instruction. + +SLTI (set less than immediate) places the value 1 in register {\em rd} +if register {\em rs1} is less than the sign-extended immediate when +both are treated as signed numbers, else 0 is written to {\em rd}. +SLTIU is similar but compares the values as unsigned numbers (i.e., +the immediate is first sign-extended to XLEN bits then treated as an +unsigned number). Note, SLTIU {\em rd}, {\em rs1}, 1 sets {\em rd} +to 1 if {\em rs1} equals zero, otherwise sets {\em rd} to 0 (assembler +pseudo-op SEQZ {\em rd, rs}). + +ANDI, ORI, XORI are logical operations that perform bitwise AND, OR, +and XOR on register {\em rs1} and the sign-extended 12-bit immediate +and place the result in {\em rd}. Note, XORI {\em rd, rs1, -1} +performs a bitwise logical inversion of register {\em rs1} (assembler +pseudo-instruction NOT {\em rd, rs}). + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}R@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +0000000 & shamt[4:0] & src & SLLI & dest & OP-IMM \\ +0000000 & shamt[4:0] & src & SRLI & dest & OP-IMM \\ +0100000 & shamt[4:0] & src & SRAI & dest & OP-IMM \\ +\end{tabular} +\end{center} + +Shifts by a constant are encoded as a specialization of the +I-type format. The operand to be shifted is in {\em rs1}, and the +shift amount is encoded in the lower 5 bits of the I-immediate field. +The right shift type is encoded in a high bit of the I-immediate. +SLLI is a logical left shift (zeros are shifted into the lower bits); +SRLI is a logical right shift (zeros are shifted into the upper bits); +and SRAI is an arithmetic right shift (the original sign bit is copied +into the vacated upper bits). + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{U@{}R@{}O} +\\ +\instbitrange{31}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +20 & 5 & 7 \\ +U-immediate[31:12] & dest & LUI \\ +U-immediate[31:12] & dest & AUIPC +\end{tabular} +\end{center} + +LUI (load upper immediate) is used to build 32-bit constants and uses +the U-type format. LUI places the U-immediate value in the top 20 +bits of the destination register {\em rd}, filling in the lowest 12 +bits with zeros. + +AUIPC (add upper immediate to {\tt pc}) is used to build {\tt pc}-relative +addresses and uses the U-type format. AUIPC forms a 32-bit offset from the +20-bit U-immediate, filling in the lowest 12 bits with zeros, adds this offset +to the {\tt pc}, then places the result in register {\em rd}. + +\begin{commentary} +The AUIPC instruction supports two-instruction sequences to access +arbitrary offsets from the PC for both control-flow transfers and data +accesses. The combination of an AUIPC and the 12-bit immediate in a +JALR can transfer control to any 32-bit PC-relative address, while an +AUIPC plus the 12-bit immediate offset in regular load or store +instructions can access any 32-bit PC-relative data address. + +The current PC can be obtained by setting the U-immediate to 0. Although +a JAL +4 instruction could also be used to obtain the PC, it might cause +pipeline breaks in simpler microarchitectures or pollute the BTB structures in +more complex microarchitectures. +\end{commentary} + +\subsubsection*{Integer Register-Register Operations} + +RV32I defines several arithmetic R-type operations. All operations +read the {\em rs1} and {\em rs2} registers as source operands and +write the result into register {\em rd}. The {\em funct7} and {\em + funct3} fields select the type of operation. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}R@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct7} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +0000000 & src2 & src1 & ADD/SLT/SLTU & dest & OP \\ +0000000 & src2 & src1 & AND/OR/XOR & dest & OP \\ +0000000 & src2 & src1 & SLL/SRL & dest & OP \\ +0100000 & src2 & src1 & SUB/SRA & dest & OP \\ +\end{tabular} +\end{center} + +ADD and SUB perform addition and subtraction respectively. Overflows +are ignored and the low XLEN bits of results are written to the +destination. SLT and SLTU perform signed and unsigned compares +respectively, writing 1 to {\em rd} if $\mbox{\em rs1} < \mbox{\em + rs2}$, 0 otherwise. Note, SLTU {\em rd}, {\em x0}, {\em rs2} sets +{\em rd} to 1 if {\em rs2} is not equal to zero, otherwise sets {\em + rd} to zero (assembler pseudo-op SNEZ {\em rd, rs}). AND, OR, and +XOR perform bitwise logical operations. + +SLL, SRL, and SRA perform logical left, logical right, and arithmetic +right shifts on the value in register {\em rs1} by the shift amount +held in the lower 5 bits of register {\em rs2}. + +\subsubsection*{NOP Instruction} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +0 & 0 & ADDI & 0 & OP-IMM \\ +\end{tabular} +\end{center} + +The NOP instruction does not change any user-visible state, except +for advancing the {\tt pc}. NOP is encoded as ADDI {\em x0, x0, 0}. + +\begin{commentary} +NOPs can be used to align code segments to microarchitecturally +significant address boundaries, or to leave space for inline code +modifications. Although there are many possible ways to encode a NOP, +we define a canonical NOP encoding to allow microarchitectural +optimizations as well as for more readable disassembly output. +\end{commentary} + +\section{Control Transfer Instructions} + +RV32I provides two types of control transfer instructions: +unconditional jumps and conditional branches. Control transfer +instructions in RV32I do {\em not} have architecturally visible delay +slots. + +\subsubsection*{Unconditional Jumps} + +\vspace{-0.1in} The jump and link (JAL) instruction uses the UJ-type +format, where the J-immediate encodes a signed offset in multiples of +2 bytes. The offset is sign-extended and added to the {\tt pc} +to form the jump target address. Jumps can therefore target a +$\pm$\wunits{1}{MiB} range. JAL stores the address of the instruction +following the jump ({\tt pc}+4) into register {\em rd}. The standard +software calling convention uses {\tt x1} as the return address +register and {\tt x5} as an alternate link register. + +Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL +with {\em rd}={\tt x0}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{W@{}E@{}W@{}R@{}R@{}O} +\\ +\multicolumn{1}{c}{\instbit{31}} & +\instbitrange{30}{21} & +\multicolumn{1}{c}{\instbit{20}} & +\instbitrange{19}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[20]} & +\multicolumn{1}{c|}{imm[10:1]} & +\multicolumn{1}{c|}{imm[11]} & +\multicolumn{1}{c|}{imm[19:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +1 & 10 & \multicolumn{1}{c}{1} & 8 & 5 & 7 \\ +\multicolumn{4}{c}{offset[20:1]} & dest & JAL \\ +\end{tabular} +\end{center} + +The indirect jump instruction JALR (jump and link register) uses the +I-type encoding. The target address is obtained by adding the 12-bit +signed I-immediate to the register {\em rs1}, then setting the +least-significant bit of the result to zero. The address of +the instruction following the jump ({\tt pc}+4) is written to register +{\em rd}. Register {\tt x0} can be used as the destination if the +result is not required. +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & 0 & dest & JALR \\ +\end{tabular} +\end{center} + +The JAL and JALR instructions can generate a misaligned instruction +fetch exception if the target address is not aligned to a four-byte +boundary. + +\begin{commentary} +The unconditional jump instructions all use PC-relative addressing to +help support position-independent code. The JALR instruction was +defined to enable a two-instruction sequence to jump anywhere in a +32-bit absolute address range. A LUI instruction can first load {\em + rs1} with the upper 20 bits of a target address, then JALR can add +in the lower bits. Similarly, AUIPC then JALR can jump +anywhere in a 32-bit {\tt pc}-relative address range. + +Note that the JALR instruction does not treat the 12-bit immediate as +multiples of 2 bytes, unlike the conditional branch instructions. +This avoids one more immediate format in hardware. In +practice, most uses of JALR will have either a zero immediate or be +paired with a LUI or AUIPC, so the slight reduction in range is not +significant. + +The JALR instruction ignores the lowest bit of the calculated target +address. This both simplifies the hardware slightly and allows the +low bit of function pointers to be used to store auxiliary +information. Although there is potentially a slight loss of error +checking in this case, in practice jumps to an incorrect instruction +address will usually quickly raise an exception. + +Instruction fetch misaligned exceptions are not possible on machines +that support extensions with 16-bit aligned instructions, such as the +compressed instruction set extension, C. + +Return-address prediction stacks are a common feature of high-performance +instruction-fetch units. We note that {\em rd} and {\em rs1} can be used to +guide an implementation's instruction-fetch prediction logic, indicating +whether JALR instructions should push ({\em rd}$=${\tt x1}/{\tt x5}), pop +({\em rs1}$=${\tt x1}/{\tt x5}), or not touch (otherwise) +a return-address stack. Similarly, a JAL instruction should push the return +address onto the return-address stack only when {\em rd}$=${\tt x1}/{\tt x5}. + +When used with a base {\em rs1}$=${\tt x0}, JALR can be used to implement +a single instruction subroutine call to the lowest \wunits{2}{KiB} or highest +\wunits{2}{KiB} address region from anywhere in the address space, which could +be used to implement fast calls to a small runtime library. +\end{commentary} + +\subsubsection*{Conditional Branches} + +All branch instructions use the SB-type instruction format. The +12-bit B-immediate encodes signed offsets in multiples of 2, and is +added to the current {\tt pc} to give the target address. The +conditional branch range is $\pm$\wunits{4}{KiB}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{W@{}R@{}F@{}F@{}R@{}R@{}F@{}S} +\\ +\multicolumn{1}{c}{\instbit{31}} & +\instbitrange{30}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{8} & +\multicolumn{1}{c}{\instbit{7}} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[12]} & +\multicolumn{1}{c|}{imm[10:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:1]} & +\multicolumn{1}{c|}{imm[11]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +1 & 6 & 5 & 5 & 3 & 4 & 1 & 7 \\ +\multicolumn{2}{c}{offset[12,10:5]} & src2 & src1 & BEQ/BNE & \multicolumn{2}{c}{offset[11,4:1]} & BRANCH \\ +\multicolumn{2}{c}{offset[12,10:5]} & src2 & src1 & BLT[U] & \multicolumn{2}{c}{offset[11,4:1]} & BRANCH \\ +\multicolumn{2}{c}{offset[12,10:5]} & src2 & src1 & BGE[U] & \multicolumn{2}{c}{offset[11,4:1]} & BRANCH \\ +\end{tabular} +\end{center} + +Branch instructions compare two registers. BEQ and BNE take the +branch if registers {\em rs1} and {\em rs2} are equal or unequal +respectively. BLT and BLTU take the branch if {\em rs1} is less than +{\em rs2}, using signed and unsigned comparison respectively. BGE and +BGEU take the branch if {\em rs1} is greater than or equal to {\em rs2}, +using signed and unsigned comparison respectively. Note, BGT, BGTU, +BLE, and BLEU can be synthesized by reversing the operands to BLT, +BLTU, BGE, and BGEU, respectively. + +Software should be optimized such that the sequential code path is the +most common path, with less-frequently taken code paths placed out of +line. Software should also assume that backward branches will be +predicted taken and forward branches as not taken, at least the +first time they are encountered. Dynamic predictors should quickly +learn any predictable branch behavior. + +Unlike some other architectures, the RISC-V jump (JAL with {\em + rd}={\tt x0}) instruction should always be used for unconditional +branches instead of a conditional branch instruction with an always-true +condition. RISC-V jumps are also PC-relative and support a much +wider offset range than branches, and will not pressure conditional +branch prediction tables. + +\begin{commentary} +The conditional branches were designed to include arithmetic +comparison operations between two registers (as also done in PA-RISC +and Xtensa ISA), rather than use condition codes (x86, ARM, SPARC, +PowerPC), or to only compare one register against zero (Alpha, MIPS), +or two registers only for equality (MIPS). This design was motivated +by the observation that a combined compare-and-branch instruction fits +into a regular pipeline, avoids additional condition code state or use +of a temporary register, and reduces static code size and dynamic +instruction fetch traffic. Another point is that comparisons against +zero require non-trivial circuit delay (especially after the move to +static logic in advanced processes) and so are almost as expensive as +arithmetic magnitude compares. Another advantage of a fused +compare-and-branch instruction is that branches are observed earlier +in the front-end instruction stream, and so can be predicted earlier. +There is perhaps an advantage to a design with condition codes in the +case where multiple branches can be taken based on the same condition +codes, but we believe this case to be relatively rare. + +We considered but did not include static branch hints in the +instruction encoding. These can reduce the pressure on dynamic +predictors, but require more instruction encoding space and +software profiling for best results, and can result in poor +performance if production runs do not match profiling runs. + +We considered but did not include conditional moves or predicated +instructions, which can effectively replace unpredictable short +forward branches. Conditional moves are the simpler of the two, but +are difficult to use with conditional code that might cause exceptions +(memory accesses and floating-point operations). Predication adds +additional flag state to a system, additional instructions to set and +clear flags, and additional encoding overhead on every instruction. +Both conditional move and predicated instructions add complexity to +out-of-order microarchitectures, adding an implicit third source +operand due to the need to copy the original value of the destination +architectural register into the renamed destination physical register +if the predicate is false. Also, static compile-time decisions to use +predication instead of branches can result in lower performance on +inputs not included in the compiler training set, especially given +that unpredictable branches are rare, and becoming rarer as branch +prediction techniques improve. + +We note that various microarchitectural techniques exist to +dynamically convert unpredictable short forward branches into +internally predicated code to avoid the cost of flushing pipelines on +a branch mispredict~\cite{heil-tr1996,Klauser-1998,Kim-micro2005} and +have been implemented in commercial processors~\cite{ibmpower7}. +The simplest techniques just reduce the penalty of recovering from a +mispredicted short forward branch by only flushing instructions in the +branch shadow instead of the entire fetch pipeline, or by fetching +instructions from both sides using wide instruction fetch or idle +instruction fetch slots. More complex techniques for out-of-order +cores add internal predicates on instructions in the branch shadow, +with the internal predicate value written by the branch instruction, +allowing the branch and following instructions to be executed +speculatively and out-of-order with respect to other code~\cite{ibmpower7}. +\end{commentary} + +\section{Load and Store Instructions} + +RV32I is a load-store architecture, where only load and store +instructions access memory and arithmetic instructions only operate on +CPU registers. RV32I provides a 32-bit user address space that is +byte-addressed and little-endian. The execution environment will +define what portions of the address space are legal to access. Loads +with a destination of {\tt x0} must still raise any exceptions and +action any other side effects even though the load value is discarded. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & width & dest & LOAD \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & width & offset[4:0] & STORE \\ +\end{tabular} +\end{center} + +Load and store instructions transfer a value between the registers and +memory. Loads are encoded in the I-type format and stores are +S-type. The effective byte address is obtained by adding register +{\em rs1} to the sign-extended 12-bit offset. Loads copy a value +from memory to register {\em rd}. Stores copy the value in register +{\em rs2} to memory. + +The LW instruction loads a 32-bit value from memory into {\em rd}. LH +loads a 16-bit value from memory, then sign-extends to 32-bits before +storing in {\tt rd}. LHU loads a 16-bit value from memory but then +zero extends to 32-bits before storing in {\em rd}. LB and LBU are +defined analogously for 8-bit values. The SW, SH, and SB instructions +store 32-bit, 16-bit, and 8-bit values from the low bits of register +{\em rs2} to memory. + +For best performance, the effective address for all loads and stores +should be naturally aligned for each data type (i.e., on a four-byte +boundary for 32-bit accesses, and a two-byte boundary for 16-bit +accesses). The base ISA supports misaligned accesses, but these might +run extremely slowly depending on the implementation. Furthermore, +naturally aligned loads and stores are guaranteed to execute +atomically, whereas misaligned loads and stores might not, and hence +require additional synchronization to ensure atomicity. + +\begin{commentary} +Misaligned accesses are occasionally required when porting legacy +code, and are essential for good performance on many applications when +using any form of packed-SIMD extension. Our rationale for supporting +misaligned accesses via the regular load and store instructions is to +simplify the addition of misaligned hardware support. One option +would have been to disallow misaligned accesses in the base ISA and +then provide some separate ISA support for misaligned accesses, either +special instructions to help software handle misaligned accesses or a +new hardware addressing mode for misaligned accesses. Special +instructions are difficult to use, complicate the ISA, and often add +new processor state (e.g., SPARC VIS align address offset register) or +complicate access to existing processor state (e.g., MIPS LWL/LWR +partial register writes). In addition, for loop-oriented packed-SIMD +code, the extra overhead when operands are misaligned motivates +software to provide multiple forms of loop depending on operand +alignment, which complicates code generation and adds to loop startup +overhead. New misaligned hardware addressing modes take considerable +space in the instruction encoding or require very simplified +addressing modes (e.g., register indirect only). + +We do not mandate atomicity for misaligned accesses so simple +implementations can just use a machine trap and software handler to +handle some or all misaligned accesses. If hardware misaligned support is +provided, software can exploit this by simply using regular load and +store instructions. Hardware can then automatically optimize accesses +depending on whether runtime addresses are aligned. +\end{commentary} + +\section{Memory Model} + +The base RISC-V ISA supports multiple concurrent threads of execution +within a single user address space. Each RISC-V thread has its own +user register state and program counter, and executes an independent +sequential instruction stream. The execution environment will define +how RISC-V threads are created and managed. RISC-V threads can +communicate and synchronize with other threads either via calls to the +execution environment, which are documented separately in the +specification for each execution environment, or directly via the +shared memory system. RISC-V threads can also interact with I/O +devices, and indirectly with each other, via loads and stores to +portions of the address space assigned to I/O. + +In the base RISC-V ISA, each RISC-V thread observes its own memory +operations as if they executed sequentially in program order. RISC-V +has a relaxed memory model between threads, requiring an explicit +FENCE instruction to guarantee any specific ordering between memory +operations from different RISC-V threads. Chapter~\ref{atomics} +describes the optional atomic memory instruction extensions ``A'', +which provide additional synchronization operations. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{F@{}IIIIIIIIF@{}F@{}F@{}S} +\\ +\instbitrange{31}{28} & +\multicolumn{1}{c}{\instbit{27}} & +\multicolumn{1}{c}{\instbit{26}} & +\multicolumn{1}{c}{\instbit{25}} & +\multicolumn{1}{c}{\instbit{24}} & +\multicolumn{1}{c}{\instbit{23}} & +\multicolumn{1}{c}{\instbit{22}} & +\multicolumn{1}{c}{\instbit{21}} & +\multicolumn{1}{c}{\instbit{20}} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{PI} & +\multicolumn{1}{c|}{PO} & +\multicolumn{1}{c|}{PR} & +\multicolumn{1}{c|}{PW} & +\multicolumn{1}{|c|}{SI} & +\multicolumn{1}{c|}{SO} & +\multicolumn{1}{c|}{SR} & +\multicolumn{1}{c|}{SW} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +4 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 5 & 3 & 5 & 7 \\ +0 & \multicolumn{4}{c}{predecessor} & \multicolumn{4}{c}{successor} & 0 & FENCE & 0 & MISC-MEM \\ +\end{tabular} +\end{center} + +The FENCE instruction is used to order device I/O and +memory accesses as viewed by other RISC-V threads and external devices +or coprocessors. Any combination of device input (I), device output +(O), memory reads (R), and memory writes (W) may be ordered with +respect to any combination of the same. Informally, no other RISC-V +thread or external device can observe any operation in the {\em + successor} set following a FENCE before any operation in the {\em + predecessor} set preceding the FENCE. The execution environment +will define what I/O operations are possible, and in particular, which +load and store instructions might be treated and ordered as device +input and device output operations respectively rather than memory +reads and writes. For example, memory-mapped I/O devices will +typically be accessed with uncached loads and stores that are ordered +using the I and O bits rather than the R and W bits. Instruction-set +extensions might also describe new coprocessor I/O instructions that +will also be ordered using the I and O bits in a FENCE. + +The unused fields in the FENCE instruction, {\em imm[11:8]}, {\em rs1}, and +{\em rd}, are reserved for finer-grain fences in future extensions. For +forward compatibility, base implementations shall ignore these fields, and +standard software shall zero these fields. + +\begin{commentary} +We chose a relaxed memory model to allow high performance from simple machine +implementations. A relaxed memory model is also most compatible with likely +future coprocessor or accelerator extensions. We separate out I/O ordering +from memory R/W ordering to avoid unnecessary serialization within +a device-driver thread and also to support alternative non-memory paths to +control added coprocessors or I/O devices. Simple implementations may +additionally ignore the {\em predecessor} and {\em successor} fields and +always execute a conservative fence on all operations. +\end{commentary} + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +0 & 0 & FENCE.I & 0 & MISC-MEM \\ +\end{tabular} +\end{center} + +The FENCE.I instruction is used to synchronize the instruction and +data streams. RISC-V does not guarantee that stores to instruction +memory will be made visible to instruction fetches on the same RISC-V +thread until a FENCE.I instruction is executed. A FENCE.I instruction +only ensures that a subsequent instruction fetch on a RISC-V thread +will see any previous data stores already visible to the same RISC-V +thread. FENCE.I does {\em not} ensure that other RISC-V threads' +instruction fetches will observe the local thread's stores in a +multiprocessor system. To make a store to instruction memory visible +to all RISC-V threads, the writing thread has to execute a data FENCE +before requesting that all remote RISC-V threads execute a FENCE.I. + +The unused fields in the FENCE.I instruction, {\em imm[11:0]}, {\em rs1}, and +{\em rd}, are reserved for finer-grain fences in future extensions. For +forward compatibility, base implementations shall ignore these fields, and +standard software shall zero these fields. + +\begin{commentary} +The FENCE.I instruction was designed to support a wide variety of +implementations. A simple implementation can flush the local +instruction cache and the instruction pipeline when the FENCE.I is +executed. A more complex implementation might snoop the instruction +(data) cache on every data (instruction) cache miss, or use an +inclusive unified private L2 cache to invalidate lines from the +primary instruction cache when they are being written by a local store +instruction. If instruction and data caches are kept coherent in this +way, then only the pipeline needs to be flushed at a FENCE.I. + +We considered but did not include a ``store instruction word'' +instruction (as in MAJC~\cite{majc}). JIT compilers may generate a +large trace of instructions before a single FENCE.I, and amortize any +instruction cache snooping/invalidation overhead by writing translated +instructions to memory regions that are known not to reside in the +I-cache. +\end{commentary} + +\section{Control and Status Register Instructions} + +SYSTEM instructions are used to access system functionality that might +require privileged access and are encoded using the I-type instruction +format. These can be divided into two main classes: those that +atomically read-modify-write control and status registers (CSRs), and +all other potentially privileged instructions. CSR instructions are +described in this section, with the two other user-level SYSTEM +instructions described in the following section. + +\begin{commentary} +The SYSTEM instructions are defined to allow simpler implementations +to always trap to a single software trap handler. More sophisticated +implementations might execute more of each system instruction in +hardware. +\end{commentary} + +\subsubsection*{CSR Instructions} + +We define the full set of CSR instructions here, although in the standard +user-level base ISA, only a handful of read-only counter CSRs are accessible. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{csr} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +source/dest & source & CSRRW & dest & SYSTEM \\ +source/dest & source & CSRRS & dest & SYSTEM \\ +source/dest & source & CSRRC & dest & SYSTEM \\ +source/dest & zimm[4:0] & CSRRWI & dest & SYSTEM \\ +source/dest & zimm[4:0] & CSRRSI & dest & SYSTEM \\ +source/dest & zimm[4:0] & CSRRCI & dest & SYSTEM \\ +\end{tabular} +\end{center} + +The CSRRW (Atomic Read/Write CSR) instruction atomically swaps values +in the CSRs and integer registers. CSRRW reads the old value of the +CSR, zero-extends the value to XLEN bits, then writes it to integer +register {\em rd}. The initial value in {\em rs1} is written to the +CSR. If {\em rd}={\tt x0}, then the instruction shall not read the CSR +and shall not cause any of the side-effects that might occur on a CSR +read. + +The CSRRS (Atomic Read and Set Bits in CSR) instruction reads the +value of the CSR, zero-extends the value to XLEN bits, and writes it +to integer register {\em rd}. The initial value in integer register +{\em rs1} is treated as a bit mask that specifies bit positions to be +set in the CSR. Any bit that is high in {\em rs1} will cause the +corresponding bit to be set in the CSR, if that CSR bit is writable. +Other bits in the CSR are unaffected (though CSRs might have side +effects when written). + +The CSRRC (Atomic Read and Clear Bits in CSR) instruction reads the +value of the CSR, zero-extends the value to XLEN bits, and writes it +to integer register {\em rd}. The initial value in integer register +{\em rs1} is treated as a bit mask that specifies bit positions to be +cleared in the CSR. Any bit that is high in {\em rs1} will cause the +corresponding bit to be cleared in the CSR, if that CSR bit is +writable. Other bits in the CSR are unaffected. + +For both CSRRS and CSRRC, if {\em rs1}={\tt x0}, then the instruction +will not write to the CSR at all, and so shall not cause any of the +side effects that might otherwise occur on a CSR write, such as +raising illegal instruction exceptions on accesses to read-only CSRs. +Note that if {\em rs1} specifies a register holding a zero value other +than {\tt x0}, the instruction will still attempt to write the +unmodified value back to the CSR and will cause any attendant side effects. + +The CSRRWI, CSRRSI, and CSRRCI variants are similar to CSRRW, CSRRS, +and CSRRC respectively, except they update the CSR using an XLEN-bit +value obtained by zero-extending a 5-bit immediate (zimm[4:0]) field +encoded in the {\em rs1} field instead of a value from an integer +register. For CSRRSI and CSRRCI, if the zimm[4:0] field is zero, then +these instructions will not write to the CSR, and shall not cause any +of the side effects that might otherwise occur on a CSR write. For +CSRRWI, if {\em rd}={\tt x0}, then the instruction shall not read the +CSR and shall not cause any of the side-effects that might occur on a +CSR read. + +Some CSRs, such as the instructions retired counter, {\tt instret}, may be +modified as side effects of instruction execution. In these cases, if a CSR +access instruction reads a CSR, it reads the value prior to the execution of +the instruction. If a CSR access instruction writes a CSR, the update occurs +after the execution of the instruction. In particular, a value written to +{\tt instret} by one instruction will be the value read by the following +instruction (i.e., the increment of {\tt instret} caused by the first +instruction retiring happens before the write of the new value). + +The assembler pseudo-instruction to read a CSR, CSRR {\em rd, csr}, is +encoded as CSRRS {\em rd, csr, x0}. The assembler pseudo-instruction +to write a CSR, CSRW {\em csr, rs1}, is encoded as CSRRW {\em x0, csr, + rs1}, while CSRWI {\em csr, zimm}, is encoded as CSRRWI {\em x0, + csr, zimm}. + +Further assembler pseudo-instructions are defined to set and clear +bits in the CSR when the old value is not required: CSRS/CSRC {\em + csr, rs1}; CSRSI/CSRCI {\em csr, zimm}. + +\subsubsection*{Timers and Counters} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{csr} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +RDCYCLE[H] & 0 & CSRRS & dest & SYSTEM \\ +RDTIME[H] & 0 & CSRRS & dest & SYSTEM \\ +RDINSTRET[H] & 0 & CSRRS & dest & SYSTEM \\ +\end{tabular} +\end{center} + +RV32I provides a number of 64-bit read-only user-level counters, which +are mapped into the 12-bit CSR address space and accessed in 32-bit +pieces using CSRRS instructions. + +The RDCYCLE pseudo-instruction reads the low XLEN bits of the {\tt + cycle} CSR which holds a count of the number of clock cycles +executed by the processor on which the hardware thread is running from +an arbitrary start time in the past. RDCYCLEH is +an RV32I-only instruction that reads bits 63--32 of the same cycle +counter. The underlying 64-bit counter should never overflow in +practice. The rate at which the cycle counter advances will depend on +the implementation and operating environment. The execution +environment should provide a means to determine the current rate +(cycles/second) at which the cycle counter is incrementing. + +The RDTIME pseudo-instruction reads the low XLEN bits of the {\tt + time} CSR, which counts wall-clock real time that has passed from an +arbitrary start time in the past. RDTIMEH is an RV32I-only instruction +that reads bits 63--32 of the same real-time counter. The underlying 64-bit +counter should never overflow in practice. The execution environment +should provide a means of determining the period of the real-time +counter (seconds/tick). The period must be constant. The +real-time clocks of all hardware threads in a single user application +should be synchronized to within one tick of the real-time clock. The +environment should provide a means to determine the accuracy of the +clock. + +The RDINSTRET pseudo-instruction reads the low XLEN bits of the {\tt + instret} CSR, which counts the number of instructions retired by +this hardware thread from some arbitrary start point in the past. +RDINSTRETH is an RV32I-only instruction that reads bits 63--32 of the +same instruction counter. The underlying 64-bit counter that should +never overflow in practice. + +The following code sequence will read a valid 64-bit cycle counter value into +{\tt x3}:{\tt x2}, even if the counter overflows between reading its upper +and lower halves. + +\begin{figure}[h!] +\begin{center} +\begin{verbatim} + again: + rdcycleh x3 + rdcycle x2 + rdcycleh x4 + bne x3, x4, again +\end{verbatim} +\end{center} +\caption{Sample code for reading the 64-bit cycle counter in RV32.} +\label{critical} +\end{figure} + +\begin{commentary} +We mandate these basic counters be provided in all implementations as +they are essential for basic performance analysis, adaptive and +dynamic optimization, and to allow an application to work with +real-time streams. Additional counters should be provided to help +diagnose performance problems and these should be made accessible from +user-level application code with low overhead. + +We required the counters be 64 bits wide, even on RV32, as otherwise +it is very difficult for software to determine if values have +overflowed. For a low-end implementation, the upper 32 bits of each +counter can be implemented using software counters incremented by a +trap handler triggered by overflow of the lower 32 bits. The sample +code described above shows how the full 64-bit width value can be +safely read using the individual 32-bit instructions. + +In some applications, it is important to be able to read multiple +counters at the same instant in time. When run under a multitasking +environment, a user thread can suffer a context switch while +attempting to read the counters. One solution is for the user thread +to read the real-time counter before and after reading the other +counters to determine if a context switch occurred in the middle of the +sequence, in which case the reads can be retried. We considered +adding output latches to allow a user thread to snapshot the counter +values atomically, but this would increase the size of the user +context, especially for implementations with a richer set of counters. +\end{commentary} + + +\section{Environment Call and Breakpoints} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct12} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +ECALL & 0 & PRIV & 0 & SYSTEM \\ +EBREAK & 0 & PRIV & 0 & SYSTEM \\ +\end{tabular} +\end{center} + +The ECALL instruction is used to make a request to the supporting +execution environment, which is usually an operating system. The ABI +for the system will define how parameters for the environment request +are passed, but usually these will be in defined locations in the +integer register file. + +The EBREAK instruction is used by debuggers to cause control to be +transferred back to a debugging environment. + +\begin{commentary} +ECALL and EBREAK were previously named SCALL and SBREAK. The +instructions have the same functionality and encoding, but were +renamed to reflect that they can be used more generally than to call a +supervisor-level operating system or debugger. +\end{commentary} + diff --git a/src/rv32e.tex b/src/rv32e.tex new file mode 100644 index 0000000..9c63dbc --- /dev/null +++ b/src/rv32e.tex @@ -0,0 +1,84 @@ +\chapter{RV32E Base Integer Instruction Set, Version 1.9} +\label{rv32e} + +This chapter describes the RV32E base integer instruction set, which +is a reduced version of RV32I designed for embedded systems. The main +change is to reduce the number of integer registers to 16, and to +remove the counters that are mandatory in RV32I. This chapter only +outlines the differences between RV32E and RV32I, and so should be +read after Chapter~\ref{rv32}. + +\begin{commentary} +RV32E was designed to provide an even smaller base core for embedded +microcontrollers. Although we had mentioned this possibility in +version 2.0 of this document, we initially resisted defining this +subset. However, given the demand for the smallest possible 32-bit +microcontroller, and in the interests of preempting fragmentation in +this space, we have now defined RV32E as a fourth standard base ISA in +addition to RV32I, RV64I, and RV128I. The E variant is only +standardized for the 32-bit address space width. +\end{commentary} + +\section{RV32E Programmers' Model} + +RV32E reduces the integer register count to 16 general-purpose +registers, ({\tt x0}--{\tt x15}), where {\tt x0} is a dedicated zero +register. + +\begin{commentary} +We have found that in the small RV32I core designs, the upper 16 +registers consume around one quarter of the total area of the core +excluding memories, thus their removal saves around 25\% core area +with a corresponding core power reduction. +\end{commentary} + +\begin{commentary} +This change requires a different calling convention and ABI. In +particular, RV32E is only used with a soft-float calling convention. +Systems with hardware floating-point must use an I base. +\end{commentary} + +\section{RV32E Instruction Set} + +RV32E uses the same instruction set encoding as RV32I, except that use +of register specifiers {\tt x16}--{\tt x31} in an instruction will +result in an illegal instruction exception being raised. + +\begin{commentary} +Any future standard extensions will not make use of the instruction +bits freed up by the reduced register-specifier fields and so these +are available for non-standard extensions. +\end{commentary} + +A further simplification is that the counter instructions ({\tt + rdcycle[h]},{\tt rdtime[h]}, {\tt rdinstret[h]}) are no longer +mandatory. + +\begin{commentary} +The mandatory counters require additional registers and logic, and can +be replaced with more application-specific facilities. +\end{commentary} + +\section{RV32E Extensions} + +RV32E can be extended with the M, A, and C user-level standard extensions. + +\begin{commentary} +We do not intend to support hardware floating-point with the RV32E +subset. The savings from reduced register count become negligible in +the context of a hardware floating-point unit, and we wish to reduce +the proliferation of ABIs. +\end{commentary} + +The privileged architecture of an RV32E system can include user mode +as well as machine mode, and the Mbare, Mbb, and Mbbid memory +management schemes described in Volume II. + +\begin{commentary} +We do not intend to support full Unix-style operating systems with the +RV32E subset. The savings from reduced register count become +negligible in the context of an OS-capable core, and we wish to avoid +OS fragmentation. +\end{commentary} + + diff --git a/src/rv64.tex b/src/rv64.tex new file mode 100644 index 0000000..5de9ea2 --- /dev/null +++ b/src/rv64.tex @@ -0,0 +1,253 @@ +\chapter{RV64I Base Integer Instruction Set, Version 2.0} +\label{rv64} + +This chapter describes the RV64I base integer instruction set, which +builds upon the RV32I variant described in Chapter~\ref{rv32}. This +chapter presents only the differences with RV32I, so should be read in +conjunction with the earlier chapter. + +\section{Register State} + +RV64I widens the integer registers and supported user address space to +64 bits (XLEN=64 in Figure~\ref{gprs}). + +\section{Integer Computational Instructions} + +Additional instruction variants are provided to manipulate 32-bit +values in RV64I, indicated by a `W' suffix to the opcode. These +``*W'' instructions ignore the upper 32 bits of their inputs and +always produce 32-bit signed values, i.e. bits XLEN-1 through 31 are +equal. They cause an illegal instruction exception in RV32I. + +\begin{commentary} +The compiler and calling convention maintain an invariant that all 32-bit +values are held in a sign-extended format in 64-bit registers. Even 32-bit +unsigned integers extend bit 31 into bits 63 through 32. Consequently, +conversion between unsigned and signed 32-bit integers is a no-op, +as is conversion from a signed 32-bit integer to a signed 64-bit +integer. Existing 64-bit wide SLTU and unsigned branch compares still operate +correctly on unsigned 32-bit integers under this invariant. Similarly, +existing 64-bit wide logical operations on 32-bit sign-extended integers +preserve the sign-extension property. A few new instructions +(ADD[I]W/SUBW/SxxW) are required for addition and shifts to ensure reasonable +performance for 32-bit values. +\end{commentary} + +\newpage +\subsubsection*{Integer Register-Immediate Instructions} +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +I-immediate[11:0] & src & ADDIW & dest & OP-IMM-32 \\ +\end{tabular} +\end{center} + +ADDIW is an RV64I-only instruction that adds the sign-extended 12-bit +immediate to register {\em rs1} and produces the proper sign-extension +of a 32-bit result in {\em rd}. Overflows are ignored and the result +is the low 32 bits of the result sign-extended to 64 bits. Note, +ADDIW {\em rd, rs1, 0} writes the sign-extension of the lower 32 bits +of register {\em rs1} into register {\em rd} (assembler pseudo-op +SEXT.W). + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}W@{}R@{}R@{}R@{}R@{}O} +\\ +\instbitrange{31}{26} & +\multicolumn{1}{c}{\instbit{25}} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:6]} & +\multicolumn{1}{|c|}{imm[5]} & +\multicolumn{1}{|c|}{imm[4:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +6 & \multicolumn{1}{c}{1} & 5 & 5 & 3 & 5 & 7 \\ +000000 & shamt[5] & shamt[4:0] & src & SLLI & dest & OP-IMM \\ +000000 & shamt[5] & shamt[4:0] & src & SRLI & dest & OP-IMM \\ +010000 & shamt[5] & shamt[4:0] & src & SRAI & dest & OP-IMM \\ +000000 & 0 & shamt[4:0] & src & SLLIW & dest & OP-IMM-32 \\ +000000 & 0 & shamt[4:0] & src & SRLIW & dest & OP-IMM-32 \\ +010000 & 0 & shamt[4:0] & src & SRAIW & dest & OP-IMM-32 \\ +\end{tabular} +\end{center} + +Shifts by a constant are encoded as a specialization of the I-type +format using the same instruction opcode as RV32I. The operand to be +shifted is in {\em rs1}, and the shift amount is encoded in the lower +6 bits of the I-immediate field for RV64I. The right shift type is +encoded in bit 30. SLLI is a logical left shift (zeros are shifted +into the lower bits); SRLI is a logical right shift (zeros are shifted +into the upper bits); and SRAI is an arithmetic right shift (the +original sign bit is copied into the vacated upper bits). For RV32I, +SLLI, SRLI, and SRAI generate an illegal instruction exception if +$imm[5] \neq 0$. + +SLLIW, SRLIW, and SRAIW are RV64I-only instructions that are +analogously defined but operate on 32-bit values and produce +signed 32-bit results. SLLIW, SRLIW, and SRAIW generate an illegal +instruction exception if $imm[5] \neq 0$. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{U@{}R@{}O} +\\ +\instbitrange{31}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[31:12]} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +20 & 5 & 7 \\ +U-immediate[31:12] & dest & LUI \\ +U-immediate[31:12] & dest & AUIPC +\end{tabular} +\end{center} + + +LUI (load upper immediate) uses the same opcode as RV32I. LUI places +the 20-bit U-immediate into bits 31--12 of register {\em rd} and +places zero in the lowest 12 bits. The 32-bit result is +sign-extended to 64 bits. + +AUIPC (add upper immediate to {\tt pc}) uses the same opcode as RV32I. +AUIPC (add upper immediate to {\tt pc}) is used to build {\tt + pc}-relative addresses and uses the U-type format. AUIPC appends 12 +low-order zero bits to the 20-bit U-immediate, sign-extends the result +to 64 bits, then adds it to the {\tt pc} and places the result in +register {\em rd}. + +\subsubsection*{Integer Register-Register Operations} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}R@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct7} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +0000000 & src2 & src1 & SLL/SRL & dest & OP \\ +0100000 & src2 & src1 & SRA & dest & OP \\ +0000000 & src2 & src1 & ADDW & dest & OP-32 \\ +0000000 & src2 & src1 & SLLW/SRLW & dest & OP-32 \\ +0100000 & src2 & src1 & SUBW/SRAW & dest & OP-32 \\ +\end{tabular} +\end{center} + +ADDW and SUBW are RV64I-only instructions that are defined analogously +to ADD and SUB but operate on 32-bit values and produce signed 32-bit +results. Overflows are ignored, and the low 32-bits of the result is +sign-extended to 64-bits and written to the destination register. + +SLL, SRL, and SRA perform logical left, logical right, and arithmetic +right shifts on the value in register {\em rs1} by the shift amount +held in register {\em rs2}. In RV64I, only the low 6 bits of {\em + rs2} are considered for the shift amount. + +SLLW, SRLW, and SRAW are RV64I-only instructions that are analogously +defined but operate on 32-bit values and produce signed 32-bit +results. The shift amount is given by {\em rs2[4:0]}. + +\section{Load and Store Instructions} + +RV64I extends the address space to 64 bits. The execution environment +will define what portions of the address space are legal to access. + +\vspace{-0.4in} +\begin{center} +\begin{tabular}{M@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & width & dest & LOAD \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}S@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & width & offset[4:0] & STORE \\ +\end{tabular} +\end{center} + +The LD instruction loads a 64-bit value from memory into register {\em + rd} for RV64I. + +The LW instruction loads a 32-bit value from memory and sign-extends +this to 64 bits before storing it in register {\em rd} for RV64I. The +LWU instruction, on the other hand, zero-extends the 32-bit value from +memory for RV64I. LH and LHU are defined analogously for 16-bit +values, as are LB and LBU for 8-bit values. The SD, SW, SH, and SB +instructions store 64-bit, 32-bit, 16-bit, and 8-bit values from the +low bits of register {\em rs2} to memory respectively. + +\section{System Instructions} + +In RV64I, the CSR instructions can manipulate 64-bit CSRs. In particular, the +RDCYCLE, RDTIME, and RDINSTRET pseudo-instructions read the full 64 bits of +the {\tt cycle}, {\tt time}, and {\tt instret} counters. Hence, the RDCYCLEH, +RDTIMEH, and RDINSTRETH instructions are not necessary and are illegal in +RV64I. diff --git a/src/rvc-instr-table.tex b/src/rvc-instr-table.tex new file mode 100644 index 0000000..b4bc8c6 --- /dev/null +++ b/src/rvc-instr-table.tex @@ -0,0 +1,537 @@ + +\begin{table}[h] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}l} +& & & & & & & & & & \\ + & +\instbit{15} & +\instbit{14} & +\instbit{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\multicolumn{1}{c}{\instbit{5}} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\cline{2-17} + + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{8}{c|}{0} & +\multicolumn{3}{c|}{0} & +\multicolumn{2}{c|}{00} & {\em Illegal instruction} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{8}{c|}{nzimm[5:4$\vert$9:6$\vert$2$\vert$3]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.ADDI4SPN {\em \tiny (RES, nzimm=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.FLD {\em \tiny (RV32/64)}\\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{3}{c|}{imm[5:4$\vert$8]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.LQ {\em \tiny (RV128)}\\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{010} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[2$\vert$6]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.LW \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[2$\vert$6]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.FLW {\em \tiny (RV32)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rd$'$} & +\multicolumn{2}{c|}{00} & C.LD {\em \tiny (RV64/128)}\\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{11}{c|}{---} & +\multicolumn{2}{c|}{00} & {\em Reserved} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{101} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{00} & C.FSD {\em \tiny (RV32/64)}\\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{101} & +\multicolumn{3}{c|}{imm[5:4$\vert$8]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{00} & C.SQ {\em \tiny (RV128)}\\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{110} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[2$\vert$6]} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{00} & C.SW \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{111} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[2$\vert$6]} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{00} & C.FSW {\em \tiny (RV32)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{111} & +\multicolumn{3}{c|}{imm[5:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{2}{c|}{imm[7:6]} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{00} & C.SD {\em \tiny (RV64/128)}\\ +\cline{2-17} + +\end{tabular} +\end{center} +\end{small} +\caption{Instruction listing for RVC, Quadrant 0.} +\label{rvc-instr-table0} +\end{table} + +\begin{table}[h] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}l} +& & & & & & & & & & \\ + & +\instbit{15} & +\instbit{14} & +\instbit{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\multicolumn{1}{c}{\instbit{5}} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{1}{c|}{0} & +\multicolumn{5}{c|}{0} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{01} & C.NOP \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{1}{c|}{nzimm[5]} & +\multicolumn{5}{c|}{rs1/rd$\neq$0} & +\multicolumn{5}{c|}{nzimm[4:0]} & +\multicolumn{2}{c|}{01} & C.ADDI {\em \tiny (HINT, nzimm=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{11}{c|}{offset[11$\vert$4$\vert$9:8$\vert$10$\vert$6$\vert$7$\vert$3:1$\vert$5]} & +\multicolumn{2}{c|}{01} & C.JAL {\em \tiny (RV32)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rs1/rd$\neq$0} & +\multicolumn{5}{c|}{imm[4:0]} & +\multicolumn{2}{c|}{01} & C.ADDIW {\em \tiny (RV64/128; RES, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{010} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rs1/rd$\neq$0} & +\multicolumn{5}{c|}{imm[4:0]} & +\multicolumn{2}{c|}{01} & C.LI {\em \tiny (HINT, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{1}{c|}{nzimm[9]} & +\multicolumn{5}{c|}{2} & +\multicolumn{5}{c|}{nzimm[4$\vert$6$\vert$8:7$\vert$5]} & +\multicolumn{2}{c|}{01} & C.ADDI16SP {\em \tiny (RES, nzimm=0)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{1}{c|}{nzimm[17]} & +\multicolumn{5}{c|}{rs1/rd$\neq$$\{0,2\}$} & +\multicolumn{5}{c|}{nzimm[16:12]} & +\multicolumn{2}{c|}{01} & C.LUI {\em \tiny (RES, nzimm=0; HINT, rd=0)}\\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{nzimm[5]} & +\multicolumn{2}{c|}{00} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{5}{c|}{nzimm[4:0]} & +\multicolumn{2}{c|}{01} & C.SRLI {\em \tiny (RV32 NSE, nzimm[5]=1)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{00} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{01} & C.SRLI64 {\em \tiny (RV128; RV32/64 HINT)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{nzimm[5]} & +\multicolumn{2}{c|}{01} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{5}{c|}{nzimm[4:0]} & +\multicolumn{2}{c|}{01} & C.SRAI {\em \tiny (RV32 NSE, nzimm[5]=1)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{01} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{01} & C.SRAI64 {\em \tiny (RV128; RV32/64 HINT)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{2}{c|}{10} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{5}{c|}{imm[4:0]} & +\multicolumn{2}{c|}{01} & C.ANDI \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{00} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.SUB \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{01} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.XOR \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{10} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.OR \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.AND \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{00} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.SUBW {\em \tiny (RV64/128; RV32 RES)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{rs1$'$/rd$'$} & +\multicolumn{2}{c|}{01} & +\multicolumn{3}{c|}{rs2$'$} & +\multicolumn{2}{c|}{01} & C.ADDW {\em \tiny (RV64/128; RV32 RES)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{---} & +\multicolumn{2}{c|}{10} & +\multicolumn{3}{c|}{---} & +\multicolumn{2}{c|}{01} & {\em Reserved} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{---} & +\multicolumn{2}{c|}{11} & +\multicolumn{3}{c|}{---} & +\multicolumn{2}{c|}{01} & {\em Reserved} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{101} & +\multicolumn{11}{c|}{offset[11$\vert$4$\vert$9:8$\vert$10$\vert$6$\vert$7$\vert$3:1$\vert$5]} & +\multicolumn{2}{c|}{01} & C.J \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{110} & +\multicolumn{3}{c|}{offset[8$\vert$4:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{5}{c|}{offset[7:6$\vert$2:1$\vert$5]} & +\multicolumn{2}{c|}{01} & C.BEQZ \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{111} & +\multicolumn{3}{c|}{offset[8$\vert$4:3]} & +\multicolumn{3}{c|}{rs1$'$} & +\multicolumn{5}{c|}{offset[7:6$\vert$2:1$\vert$5]} & +\multicolumn{2}{c|}{01} & C.BNEZ \\ +\cline{2-17} + + +\end{tabular} +\end{center} +\end{small} +\caption{Instruction listing for RVC, Quadrant 1.} +\label{rvc-instr-table1} +\end{table} + +\begin{table}[h] +\begin{small} +\begin{center} +\begin{tabular}{p{0in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}l} +& & & & & & & & & & \\ + & +\instbit{15} & +\instbit{14} & +\instbit{13} & +\multicolumn{1}{c}{\instbit{12}} & +\instbit{11} & +\instbit{10} & +\instbit{9} & +\instbit{8} & +\instbit{7} & +\instbit{6} & +\multicolumn{1}{c}{\instbit{5}} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{1}{c|}{nzimm[5]} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{nzimm[4:0]} & +\multicolumn{2}{c|}{10} & C.SLLI {\em \tiny (HINT, rd=0; RV32 NSE, nzimm[5]=1)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{000} & +\multicolumn{1}{c|}{0} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{10} & C.SLLI64 {\em \tiny (RV128; RV32/64 HINT; HINT, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rd} & +\multicolumn{5}{c|}{imm[4:3$\vert$8:6]} & +\multicolumn{2}{c|}{10} & C.FLDSP {\em \tiny (RV32/64)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{001} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{imm[4$\vert$9:6]} & +\multicolumn{2}{c|}{10} & C.LQSP {\em \tiny (RV128; RES, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{010} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{imm[4:2$\vert$7:6]} & +\multicolumn{2}{c|}{10} & C.LWSP {\em \tiny (RES, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rd} & +\multicolumn{5}{c|}{imm[4:2$\vert$7:6]} & +\multicolumn{2}{c|}{10} & C.FLWSP {\em \tiny (RV32)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{011} & +\multicolumn{1}{c|}{imm[5]} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{imm[4:3$\vert$8:6]} & +\multicolumn{2}{c|}{10} & C.LDSP {\em \tiny (RV64/128; RES, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{5}{c|}{rs1$\neq$0} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{10} & C.JR {\em \tiny (RES, rs1=0)}\\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{0} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{rs2$\neq$0} & +\multicolumn{2}{c|}{10} & C.MV {\em \tiny (HINT, rd=0)}\\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{5}{c|}{0} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{10} & C.EBREAK \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{5}{c|}{rs1$\neq$0} & +\multicolumn{5}{c|}{0} & +\multicolumn{2}{c|}{10} & C.JALR \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{100} & +\multicolumn{1}{c|}{1} & +\multicolumn{5}{c|}{rd$\neq$0} & +\multicolumn{5}{c|}{rs2$\neq$0} & +\multicolumn{2}{c|}{10} & C.ADD {\em \tiny (HINT, rd=0)} \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{101} & +\multicolumn{6}{c|}{imm[5:3$\vert$8:6]} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{10} & C.FSDSP {\em \tiny (RV32/64)}\\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{101} & +\multicolumn{6}{c|}{imm[5:4$\vert$9:6]} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{10} & C.SQSP {\em \tiny (RV128)}\\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{110} & +\multicolumn{6}{c|}{imm[5:2$\vert$7:6]} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{10} & C.SWSP \\ +\whline{2-17} + +& +\multicolumn{3}{|c|}{111} & +\multicolumn{6}{c|}{imm[5:2$\vert$7:6]} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{10} & C.FSWSP {\em \tiny (RV32)} \\ +\cline{2-17} + +& +\multicolumn{3}{|c|}{111} & +\multicolumn{6}{c|}{imm[5:3$\vert$8:6]} & +\multicolumn{5}{c|}{rs2} & +\multicolumn{2}{c|}{10} & C.SDSP {\em \tiny (RV64/128)}\\ +\cline{2-17} + +\end{tabular} +\end{center} +\end{small} +\caption{Instruction listing for RVC, Quadrant 2.} +\label{rvc-instr-table2} +\end{table} diff --git a/src/rvc-opcode-map.tex b/src/rvc-opcode-map.tex new file mode 100644 index 0000000..48eeb08 --- /dev/null +++ b/src/rvc-opcode-map.tex @@ -0,0 +1,27 @@ +\vspace{0.1in} +\definecolor{gray}{RGB}{180,180,180} +\begin{table*}[htbp] +\begin{center} +{\footnotesize +\setlength{\tabcolsep}{4pt} +\begin{tabular}{|r|c|c|c|c|c|c|c|c|l} + \cline{1-9} + inst[15:13] & \multirow{2}{*}{000}& \multirow{2}{*}{001}& \multirow{2}{*}{010}& \multirow{2}{*}{011}& \multirow{2}{*}{100}& \multirow{2}{*}{101}& \multirow{2}{*}{110}& \multirow{2}{*}{111}\\ \cline{1-1} + inst[1:0] & & & & & & & & \\ \cline{1-9} + \multirow{3}{*}{00} & \multirow{3}{*}{ADDI4SPN} & FLD & \multirow{3}{*}{LW} & FLW & \multirow{3}{*}{\em Reserved} & FSD & \multirow{3}{*}{SW} & FSW & RV32 \\ + & & FLD & & LD & & FSD & & SD & RV64 \\ + & & LQ & & LD & & SQ & & SD & RV128 \\ \hline + \multirow{3}{*}{01} & \multirow{3}{*}{ADDI} & JAL & \multirow{3}{*}{LI} & \multirow{3}{*}{LUI/ADDI16SP} & \multirow{3}{*}{MISC-ALU} & \multirow{3}{*}{J} & \multirow{3}{*}{BEQZ} & \multirow{3}{*}{BNEZ} & RV32 \\ + & & ADDIW & & & & & & & RV64 \\ + & & ADDIW & & & & & & & RV128 \\ \hline + \multirow{3}{*}{10} & \multirow{3}{*}{SLLI} & FLDSP & \multirow{3}{*}{LWSP} & FLWSP & \multirow{3}{*}{J[AL]R/MV/ADD} & FSDSP & \multirow{3}{*}{SWSP} & FSWSP & RV32 \\ + & & FLDSP & & LDSP & & FSDSP & & SDSP & RV64 \\ + & & LQ & & LDSP & & SQ & & SDSP & RV128 \\ \cline{1-9} + \cellcolor{gray} 11 & \multicolumn{8}{c|}{\cellcolor{gray} $>$16b} \\ \cline{1-9} + \end{tabular} +} +\end{center} +\vspace{-0.15in} +\caption{RVC opcode map} +\label{rvcopcodemap} +\end{table*} diff --git a/src/sbi.tex b/src/sbi.tex new file mode 100644 index 0000000..c4fdaa2 --- /dev/null +++ b/src/sbi.tex @@ -0,0 +1,77 @@ +\chapter{Supervisor Binary Interface (SBI)} + +This chapter is a placeholder to describe the form of the SBIs we're +envisioning for the RISC-V supervisor. + +The SBI captures the instructions that can be executed together with a +set of SBI calls out to the supervisor execution environment (SEE) on +a given platform. + +Several features that might normally handled by the supervisor +operating system (OS) directly are handled via SBI calls to the SEE in +RISC-V, including: + +\begin{itemize} + +\item Reset is handled by the SEE and once the machine is set up, the + OS kernel is mapped into virtual memory, and its entry point is called. + +\item Machine-check errors and other non-maskable interrupts are + handled by the SEE before vectoring into the OS if recovery is + possible. + +\item Some device drivers may be handled by the SEE, and managed via + virtual device calls over the SBI. + +\item The presence and version of supported instruction-set extensions + is obtained via an SBI call to return the configuration string + rather than a machine register. This allows for an arbitrarily + large definition of instruction set extensions, and simplifies + virtualization where the returned machine configuration might be + modified to emulate different architectures on a given hardware + platform. + +\end{itemize} + +The SBI employs the same calling convention as the ABI specified in Volume +I of this manual. SBI entry points are located in the uppermost +\wunits{2}{KiB} of the virtual address space, so that they may be invoked with +a single {\tt jalr} instruction with {\tt x0} as the base register. + +Table~\ref{sbicalls} gives a preliminary list of SBI calls. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{| >{\ttfamily\catcode`_=12}l|p{8cm}|} +\hline +const char* sbi_get_config(void); & \parbox{8cm}{Get pointer to + configuration string.}\\ \hline +size_t sbi_hart_id(void); & \parbox{8cm}{Get ID of current hart, in + range \\ {\tt [0, number\_harts - 1 ]}.} \\ \hline +int sbi_send_ipi(size_t hart_id); & Send an interprocessor interrupt; returns 0 on success or -1 if hart ID is invalid. \\ \hline +bool sbi_clear_ipi(void); & Clear local interprocessor interrupt. Returns 1 if an IPI was pending, else 0. \\ \hline +void sbi_shutdown(void); & Terminate this supervisor-mode process. \\ \hline +int sbi_console_putchar(uint8_t ch); & Write byte to debug console (blocking); returns 0 on success, else -1. \\ \hline +int sbi_console_getchar(void); & Read byte from debug console; returns the byte on success, or -1 for failure. \\ \hline + +& \multirow{4}{*}{\parbox{8cm}{Instruct other harts to execute SFENCE.VM. {\tt harts} points to a bitmask of remote hart IDs; NULL indicates all harts. {\tt asid} holds the address-space ID; 0 indicates all address spaces.}} \\ +void sbi_remote_sfence_vm( & \\ +\ \ const uintptr_t* harts, size_t asid); & \\ +& \\ \hline + +void sbi_remote_sfence_vm_range( +& \multirow{3}{*}{\parbox{8cm}{Like {\tt sbi\_remote\_sfence\_vm}, but only orders updates to leaf page tables mapping the range {\tt [start, start+size-1]}.}} \\ +\ \ const uintptr_t* harts, size_t asid, & \\ +\ \ uintptr_t start, uintptr_t size); & \\ \hline + +void sbi_remote_fence_i( +& \multirow{2}{*}{\parbox{8cm}{Instruct remote harts to execute FENCE.I. {\tt harts} is as with {\tt sbi\_remote\_sfence\_vm}.}} \\ +\ \ const uintptr_t* harts); & \\ \hline + +int sbi_mask_interrupt(int which); & Disable a PLIC interrupt line. Returns 0 if previously disabled, 1 if previously enabled, or -1 if {\tt which} is invalid. \\ \hline +int sbi_unmask_interrupt(int which); & Enable a PLIC interrupt line. Return value is as with {\tt sbi\_mask\_interrupt}. \\ \hline +\end{tabular} +\end{center} +\caption{SBI calls.} +\label{sbicalls} +\end{table*} diff --git a/src/supervisor.tex b/src/supervisor.tex new file mode 100644 index 0000000..2f0bb65 --- /dev/null +++ b/src/supervisor.tex @@ -0,0 +1,1148 @@ +\chapter{Supervisor-Level ISA} +\label{supervisor} + +This chapter describes the RISC-V supervisor-level architecture, which +contains a common core that is used with various supervisor-level +address translation and protection schemes. Supervisor-mode always +operates inside a virtual memory scheme defined by the VM field in the +machine-mode {\tt mstatus} register. Supervisor-level code is written +to a given VM scheme, and cannot change the VM scheme in use. + +Supervisor-level code relies on a supervisor execution environment to +initialize the environment and enter the supervisor code at an entry +point defined by the system binary interface (SBI). The SBI also +defines function entry points that provide supervisor environment +services for supervisor-level code. + +\begin{commentary} +Supervisor mode is deliberately restricted in terms of interactions +with underlying physical hardware, such as physical memory and device +interrupts, to support clean virtualization. A more conventional +virtualization-unfriendly operating system can be ported while +retaining a protected M-mode environment by using M-mode to unprotect +selected physical memory regions for access by the supervisor, and by +delegating selected device interrupts to S-mode. +\end{commentary} + +\section{Supervisor CSRs} + +A number of CSRs are provided for the supervisor. + +\begin{commentary} +The supervisor should only view CSR state that should be visible to a +supervisor-level operating system. In particular, there is no +information about the existence (or non-existence) of higher privilege +levels (hypervisor or machine) visible in the CSRs accessible by the +supervisor. + +Many supervisor CSRs are a subset of the equivalent machine-mode CSR, +and the machine-mode chapter should be read first to help understand +the supervisor-level CSR descriptions. +\end{commentary} + +\subsection{Supervisor Status Register (\tt sstatus)} +\label{sstatus} + +The {\tt sstatus} register is an XLEN-bit read/write register +formatted as shown in Figure~\ref{sstatusreg}. The {\tt sstatus} +register keeps track of the processor's current operating state. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{cYccccWcWccWcc} +\\ +\instbit{XLEN-1} & +\instbitrange{XLEN-2}{19} & +\instbit{18} & +\instbit{17} & +\instbitrange{16}{15} & +\instbitrange{14}{13} & +\instbitrange{12}{9} & +\instbit{8} & +\instbitrange{7}{6} & +\instbit{5} & +\instbit{4} & +\instbitrange{3}{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{SD} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{PUM} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{XS[1:0]} & +\multicolumn{1}{c|}{FS[1:0]} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{SPP} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{SPIE} & +\multicolumn{1}{c|}{UPIE} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{SIE} & +\multicolumn{1}{c|}{UIE} +\\ +\hline +1 & XLEN-20 & 1 & 1 & 2 & 2 & 4 & 1 & 2 & 1 & 1 & 2 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor-mode status Register.} +\label{sstatusreg} +\end{figure*} + +The SPP bit indicates the privilege level at which a hart was executing before +entering supervisor mode. When a trap is taken, SPP is set to 0 if the trap +originated from user mode, or 1 otherwise. When an SRET instruction +(see Section~\ref{otherpriv}) is executed to return from the trap handler, the +privilege level is set to user mode if the SPP bit is 0, or supervisor mode if +the SPP bit is 1; SPP is then set to 0. + +The SIE bit enables or disables all interrupts in supervisor mode. +When SIE is clear, interrupts are not taken while in supervisor mode. +When the hart is running in user-mode, the value in SIE is ignored, and +supervisor-level interrupts are enabled. The supervisor can disable +indivdual interrupt sources using the {\tt sie} register. + +The SPIE bit indicates whether interrupts were enabled before entering +supervisor mode. When a trap is taken into supervisor mode, SPIE is +set to either SIE or UIE depending on whether the trap was taken in +supervisor or user mode respectively, and SIE is set to 0. When an +SRET instruction is executed, if SPP=S, then SIE is set to SPIE; or +if SPP=U, then UIE is set to SPIE. In either case, SPIE is then set +to 1. + +The UIE bit enables or disables user-mode interrupts. User-level interrupts +are enabled only if UIE is set and the hart is running in user-mode. The UPIE +bit indicates whether user-level interrupts were enabled prior to taking +a user-level trap. When a URET instruction is executed, UIE is +set to UPIE, and UPIE is set to 1. User-level interrupts are optional. If +omitted, the UIE and UPIE bits are hardwired to zero. + +\begin{commentary} +The {\tt sstatus} register is a subset of the {\tt mstatus} register. In +a straightforward implementation, reading or writing any field in {\tt +sstatus} is equivalent to reading or writing the homonymous field in +{\tt mstatus}. +\end{commentary} + +\subsection{Memory Privilege in {\tt sstatus} Register} +\label{sec:pum} + +The PUM (Protect User Memory) bit modifies the privilege with which S-mode +loads, stores, and instruction fetches access virtual memory. When PUM=0, +translation and protection behave as normal. When PUM=1, S-mode memory +accesses to pages that are accessible by U-mode (U=1 in Figure~\ref{sv32pte}) +will fault. PUM has no effect when executing in U-mode. + +\begin{commentary} +The PUM mechanism prevents supervisor software from inadvertently accessing +user memory. Operating systems can execute the majority of code with PUM set; +the few code segments that should access user memory can temporarily clear +PUM. +\end{commentary} + +\subsection{Supervisor Trap Vector Base Address Register ({\tt stvec})} + +The {\tt stvec} register is an XLEN-bit read/write register that holds the +base address of the S-mode trap vector. When an exception occurs, the {\tt +pc} is set to {\tt stvec}. The {\tt stvec} register is always aligned to +a 4-byte boundary. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{J@{}F} +\instbitrange{XLEN-1}{2} & +\instbitrange{1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt Trap Vector Base Address} & +\multicolumn{1}{c|}{0} \\ +\hline +XLEN-2 & 2 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor trap vector base address register ({\tt stvec}).} +\label{stvecreg} +\end{figure*} + +\subsection{Supervisor Interrupt Registers ({\tt sip} and {\tt sie})} + +The {\tt sip} register is an XLEN-bit read/write register containing +information on pending interrupts, while {\tt sie} is the corresponding +XLEN-bit read/write register containing interrupt enable bits. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{EccFccFcc} +\instbitrange{XLEN-1}{10} & +\instbit{9} & +\instbit{8} & +\instbitrange{7}{6} & +\instbit{5} & +\instbit{4} & +\instbitrange{3}{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{SEIP} & +\multicolumn{1}{c|}{UEIP} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{STIP} & +\multicolumn{1}{c|}{UTIP} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{SSIP} & +\multicolumn{1}{c|}{USIP} \\ +\hline +XLEN-10 & 1 & 1 & 2 & 1 & 1 & 2 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor interrupt-pending register ({\tt sip}).} +\label{sipreg} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\setlength{\tabcolsep}{4pt} +\begin{tabular}{EccFccFcc} +\instbitrange{XLEN-1}{10} & +\instbit{9} & +\instbit{8} & +\instbitrange{7}{6} & +\instbit{5} & +\instbit{4} & +\instbitrange{3}{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{SEIE} & +\multicolumn{1}{c|}{UEIE} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{STIE} & +\multicolumn{1}{c|}{UTIE} & +\multicolumn{1}{c|}{0} & +\multicolumn{1}{c|}{SSIE} & +\multicolumn{1}{c|}{USIE} \\ +\hline +XLEN-10 & 1 & 1 & 2 & 1 & 1 & 2 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor interrupt-enable register ({\tt sie}).} +\label{siereg} +\end{figure*} + +Three types of interrupts are defined: software interrupts, timer interrupts, +and external interrupts. A supervisor-level software interrupt is triggered +on the current hart by writing 1 to its supervisor software interrupt-pending +(SSIP) bit in the {\tt sip} register. A pending supervisor-level software +interrupt can be cleared by writing 0 to the SSIP bit in {\tt sip}. +Supervisor-level software interrupts are disabled when the SSIE bit in the +{\tt sie} register is clear. + +Interprocessor interrupts are sent to other harts by means of SBI +calls, which will ultimately cause the SSIP bit to be set in the +recipient hart's {\tt sip} register. + +A user-level software interrupt is triggered on the current hart by writing +1 to its user software interrupt-pending (USIP) bit in the {\tt sip} register. +A pending user-level software interrupt can be cleared by writing 0 to the +USIP bit in {\tt sip}. User-level software interrupts are disabled when the +USIE bit in the {\tt sie} register is clear. If user-level interrupts are not +supported, USIP and USIE are hardwired to zero. + +All bits besides SSIP and USIP in the {\tt sip} register are read-only. + +A supervisor-level timer interrupt is pending if the STIP bit in the {\tt sip} +register is set. Supervisor-level timer interrupts are disabled when the STIE +bit in the {\tt sie} register is clear. An SBI call to the SEE may be used to +clear the pending timer interrupt. + +A user-level timer interrupt is pending if the UTIP bit in the {\tt sip} +register is set. User-level timer interrupts are disabled when the UTIE bit +in the {\tt sie} register is clear. If user-level interrupts are supported, +the ABI should provide a facility for scheduling timer interrupts in terms of +real-time counter values. If user-level interrupts are not supported, UTIP +and UTIE are hardwired to zero. + +A supervisor-level external interrupt is pending if the SEIP bit in the +{\tt sip} register is set. Supervisor-level external interrupts are disabled +when the SEIE bit in the {\tt sie} register is clear. The SBI should provide +facilities to mask, unmask, and query the cause of external interrupts. + +A user-level external interrupt is pending if the UEIP bit in the +{\tt sip} register is set. User-level external interrupts are disabled +when the UEIE bit in the {\tt sie} register is clear. If user-level +interrupts are not supported, UEIP and UEIE are hardwired to zero. + +\begin{commentary} +The {\tt sip} and {\tt sie} registers are subsets of the {\tt mip} and {\tt +mie} registers. Reading any field, or writing any writable field, of {\tt +sip}/{\tt sie} effects a read or write of the homonymous field of {\tt +mip}/{\tt mie}. +\end{commentary} + +\subsection{Supervisor Timers and Performance Counters} + +Supervisor software uses the same hardware performance monitoring facility +as user-mode software, including the {\tt time}, {\tt cycle}, and {\tt instret} +CSRs. The SBI should provide a mechanism to modify the +counter values. + +The SBI must provide a facility for scheduling timer interrupts in terms +of the real-time counter, {\tt time}. + +\subsection{Supervisor Scratch Register ({\tt sscratch})} + +The {\tt sscratch} register is an XLEN-bit read/write register, +dedicated for use by the supervisor. Typically, {\tt sscratch} is +used to hold a pointer to the hart-local supervisor context while the +hart is executing user code. At the beginning of a trap handler, {\tt + sscratch} is swapped with a user register to provide an initial +working register. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt sscratch} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor Scratch Register.} +\label{kregs} +\end{figure} + +\subsection{Supervisor Exception Program Counter ({\tt sepc})} + +{\tt sepc} is an XLEN-bit read/write register formatted as shown in +Figure~\ref{epcreg}. The low bit of {\tt sepc} ({\tt sepc[0]}) is +always zero. On implementations that do not support instruction-set +extensions with 16-bit instruction alignment, the two low bits ({\tt + sepc[1:0]}) are always zero. + +When a trap is taken, {\tt sepc} is written with +the virtual address of the instruction that encountered the exception. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt sepc} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor exception program counter register.} +\label{epcreg} +\end{figure} + +\subsection{Supervisor Cause Register ({\tt scause})} + +The {\tt scause} register is an XLEN-bit read-write register formatted +as shown in Figure~\ref{scausereg}. The Interrupt bit is set if the +exception was caused by an interrupt. The Exception Code field +contains a code identifying the last exception. Table~\ref{scauses} +lists the possible exception codes for the current supervisor ISAs, in +descending order of priority. The Exception Code is an \wlrl\ field, +so is only guaranteed to hold supported exception codes. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{c@{}U} +\instbit{XLEN-1} & +\instbitrange{XLEN-2}{0} \\ +\hline +\multicolumn{1}{|c|}{Interrupt} & +\multicolumn{1}{c|}{Exception Code (\wlrl)} \\ +\hline +1 & XLEN-1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor Cause register {\tt scause}.} +\label{scausereg} +\end{figure*} + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|r|r|l|l|} + + \hline + Interrupt & Exception Code & Description \\ + \hline + 1 & 0 & User software interrupt \\ + 1 & 1 & Supervisor software interrupt \\ + 1 & 2--3 & {\em Reserved} \\ + 1 & 4 & User timer interrupt \\ + 1 & 5 & Supervisor timer interrupt \\ + 1 & 6--7 & {\em Reserved} \\ + 1 & 8 & User external interrupt \\ + 1 & 9 & Supervisor external interrupt \\ + 1 & $\ge$10 & {\em Reserved} \\ \hline + 0 & 0 & Instruction address misaligned \\ + 0 & 1 & Instruction access fault \\ + 0 & 2 & Illegal instruction \\ + 0 & 3 & Breakpoint \\ + 0 & 4 & {\em Reserved} \\ + 0 & 5 & Load access fault \\ + 0 & 6 & AMO address misaligned \\ + 0 & 7 & Store/AMO access fault \\ + 0 & 8 & Environment call \\ + 0 & $\ge$9 & {\em Reserved} \\ \hline + +\end{tabular} +\end{center} +\caption{Supervisor cause register ({\tt scause}) values after trap.} +\label{scauses} +\end{table*} + +\subsection{Supervisor Bad Address ({\tt sbadaddr}) Register} + +{\tt sbadaddr} is an XLEN-bit read/write register formatted as shown in +Figure~\ref{badvaddrreg}. When a hardware breakpoint is triggered, or +an instruction-fetch, load, or store access exception occurs, +or an instruction-fetch or AMO address-misaligned exception occurs, +{\tt sbadaddr} is written with the faulting address. {\tt sbadaddr} +is not modified for other exceptions. + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}J} +\instbitrange{XLEN-1}{0} \\ +\hline +\multicolumn{1}{|c|}{\tt sbadaddr} \\ +\hline +XLEN \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Supervisor bad address register.} +\label{badvaddrreg} +\end{figure} + +For instruction fetch access faults on RISC-V systems with +variable-length instructions, {\tt sbadaddr} will point to the portion +of the instruction that caused the fault while {\tt sepc} will point +to the beginning of the instruction. + +\subsection{Supervisor Page-Table Base Register ({\tt sptbr})} + +The {\tt sptbr} register is an XLEN-bit read/write register formatted as shown +in Figure~\ref{rv32ptbrreg} for RV32 and Figure~\ref{rv64ptbrreg}. The {\tt +sptbr} register is only present on systems supporting paged virtual-memory +systems. This register holds the physical page number (PPN) of the root page +table, i.e., its supervisor physical address divided by \wunits{4}{KiB}, and +an address space identifier (ASID), which facilitates address-translation +fences on a per-address-space basis. + +The number of supervisor physical address bits is implementation-defined; any +unimplemented address bits are hardwired to zero in the {\tt sptbr} register. +The number of ASID bits is also implementation-defined and may be zero. The +number of implemented ASID bits may be determined by writing one to every bit +position in the ASID field, then reading back the value in {\tt sptbr} to see +which bit positions in the ASID field hold a one. + +\begin{commentary} +We store the ASID and the page table base address in the same CSR to allow the +pair to be changed atomically on a context switch. Swapping them +non-atomically could pollute the old virtual address space with new +translations, or vice-versa. This approach also slightly reduces the cost of +a context switch. +\end{commentary} + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}S@{}M} +\instbitrange{31}{22} & +\instbitrange{21}{0} \\ +\hline +\multicolumn{1}{|c|}{{\tt ASID} (\warl)} & +\multicolumn{1}{|c|}{{\tt PPN} (\warl)} \\ +\hline +10 & 22 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{RV32 Supervisor Page-Table Base Register {\tt sptbr}.} +\label{rv32ptbrreg} +\end{figure} + +\begin{commentary} +Storing a PPN in {\tt sptbr}, rather than a physical address, supports +physical address spaces larger than $2^{\mbox{\scriptsize XLEN}}$. +\end{commentary} + +\begin{figure}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}E@{}U} +\instbitrange{63}{38} & +\instbitrange{37}{0} \\ +\hline +\multicolumn{1}{|c|}{{\tt ASID} (\warl)} & +\multicolumn{1}{|c|}{{\tt PPN} (\warl)} \\ +\hline +26 & 38 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{RV64 Supervisor Page-Table Base Register {\tt sptbr}.} +\label{rv64ptbrreg} +\end{figure} + +\begin{commentary} +For many applications, the choice of page size has a substantial +performance impact. A large page size increases TLB reach and loosens +the associativity constraints on virtually-indexed, physically-tagged +caches. At the same time, large pages exacerbate internal +fragmentation, wasting physical memory and possibly cache capacity. + +After much deliberation, we have settled on a conventional page size +of 4 KiB for both RV32 and RV64. We expect this decision to ease the +porting of low-level runtime software and device drivers. The TLB +reach problem is ameliorated by transparent superpage support in +modern operating systems~\cite{transparent-superpages}. Additionally, +multi-level TLB hierarchies are quite inexpensive relative to the +multi-level cache hierarchies whose address space they map. +\end{commentary} + +\section{Supervisor Instructions} + +In addition to the SRET instruction defined in +Section~\ref{otherpriv}, one new supervisor-level instruction is +provided. + +\subsection{Supervisor Memory-Management Fence Instruction} +\label{sec:sfence.vm} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}S} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct12} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{funct3} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +SFENCE.VM & vaddr & PRIV & 0 & SYSTEM \\ +\end{tabular} +\end{center} + +The supervisor memory-management fence instruction SFENCE.VM is used to +synchronize updates to in-memory memory-management data structures with +current execution. Instruction execution causes implicit reads and writes to +these data structures; however, these implicit references are ordinarily not +ordered with respect to loads and stores in the instruction stream. Executing +an SFENCE.VM instruction guarantees that any stores in the instruction stream +prior to the SFENCE.VM are ordered before all implicit references subsequent +to the SFENCE.VM. Furthermore, executing an SFENCE.VM guarantees that any +implicit writes caused by instructions prior to the SFENCE.VM are orderd +before all loads and stores subsequent to the SFENCE.VM. + +\begin{commentary} +The SFENCE.VM is used to flush any local hardware caches related to +address translation. It is specified as a fence rather than a TLB +flush to provide cleaner semantics with respect to which instructions +are affected by the flush operation and to support a wider variety of +dynamic caching structures and memory-management schemes. SFENCE.VM +is also used by higher privilege levels to synchronize page table +writes and the address translation hardware. +\end{commentary} + +\begin{commentary} +Note the instruction has no effect on the translations of other RISC-V +threads, which must be notified separately. One approach is to use 1) +a local data fence to ensure local writes are visible globally, then +2) an interprocessor interrupt to the other thread, then 3) a local +SFENCE.VM in the interrupt handler of the remote thread, and finally +4) signal back to originating thread that operation is complete. This +is, of course, the RISC-V analog to a TLB shootdown. Alternatively, +implementations might provide direct hardware support for remote TLB +invalidation. TLB shootdowns are handled by an SBI call to hide +implementation details. +\end{commentary} + +The behavior of SFENCE.VM depends on the current value of the ASID field in +the {\tt sptbr} register. If ASID is nonzero, SFENCE.VM takes effect only for +address translations in the current address space. If ASID is zero, +SFENCE.VM affects address translations for all address spaces. In this case, +it also affects {\em global} mappings, which are described in +Section~\ref{sec:translation}. + +The register operand {\em rs1} contains an optional virtual address +argument. If {\em rs1}={\tt x0}, the fence affects all virtual +address translations and stores made to any level of the page tables. + +For the common case that the translation data structures have only +been modified for a single address mapping (i.e., one page or superpage), +{\em rs1} can specify a virtual address within +that mapping to effect a translation fence for that mapping only. +When {\em rs1}$\neq${\em x0}, the SFENCE.VM orders only stores to the +leaf page table entry corresponding to the virtual address in {\em rs1}, +and not any stores to other page table entries. + +\begin{commentary} +Simpler implementations can ignore the ASID value in {\tt sptbr} and the +virtual address in {\em rs1} and always perform a global fence. +\end{commentary} + +\section{Supervisor Operation in Mbare Environment} + +When the Mbare environment is selected in the VM field of {\tt + mstatus} (Section~\ref{sec:vm}), supervisor-mode virtual addresses +are truncated and mapped directly to supervisor-mode physical +addresses. Supervisor physical addresses are then checked using any +physical memory protection structures (Section~\ref{sec:pmp}), before +being directly converted to machine-level physical addresses. + +\section{Supervisor Operation in Base and Bounds Environments} + +When Mbb or Mbbid are selected in the VM field of {\tt mstatus} +(Section~\ref{sec:vm}), supervisor-mode virtual addresses are +translated and checked according to the appropriate machine-level base +and bound registers. The resulting supervisor-level physical +addresses are then checked using any physical memory protection +structures (Section~\ref{sec:pmp}), before being directly converted to +machine-level physical addresses. + +\section{Sv32: Page-Based 32-bit Virtual-Memory Systems} + +When Sv32 is written to the VM field in the {\tt mstatus} register, +the supervisor operates in a 32-bit paged virtual-memory system. Sv32 +is supported on RV32 systems and is designed to include mechanisms +sufficient for supporting modern Unix-based operating systems. + +\begin{commentary} +The initial RISC-V paged virtual-memory architectures have been +designed as straightforward implementations to support existing +operating systems. We have architected page table layouts to support +a hardware page-table walker. Software TLB refills are a performance +bottleneck on high-performance systems, and are especially troublesome +with decoupled specialized coprocessors. An implementation can choose +to implement software TLB refills using a machine-mode trap handler as +an extension to M-mode. +\end{commentary} + +\subsection{Addressing and Memory Protection} +\label{sec:translation} + +Sv32 implementations support a 32-bit virtual address space, divided +into \wunits{4}{KiB} pages. An Sv32 virtual address is partitioned +into a virtual page number (VPN) and page offset, as shown in +Figure~\ref{sv32va}. When Sv32 virtual memory mode is selected in the +VM field of the {\tt mstatus} register, supervisor virtual addresses +are translated into supervisor physical addresses via a two-level page +table. The 20-bit VPN is translated into a 22-bit physical page +number (PPN), while the 12-bit page offset is untranslated. The +resulting supervisor-level physical addresses are then checked using +any physical memory protection structures (Sections~\ref{sec:pmp}), +before being directly converted to machine-level physical addresses. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}O@{}O@{}E} +\instbitrange{31}{22} & +\instbitrange{21}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{VPN[1]} & +\multicolumn{1}{c|}{VPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +10 & 10 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv32 virtual address.} +\label{sv32va} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}E@{}O@{}E} +\instbitrange{33}{22} & +\instbitrange{21}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +12 & 10 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv32 physical address.} +\label{rv32va} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}E@{}O@{}Occcccccc} +\instbitrange{31}{20} & +\instbitrange{19}{10} & +\instbitrange{9}{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{\it Reserved for software} & +\multicolumn{1}{c|}{D} & +\multicolumn{1}{c|}{A} & +\multicolumn{1}{c|}{G} & +\multicolumn{1}{c|}{U} & +\multicolumn{1}{c|}{X} & +\multicolumn{1}{c|}{W} & +\multicolumn{1}{c|}{R} & +\multicolumn{1}{c|}{V} \\ +\hline +12 & 10 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv32 page table entry.} +\label{sv32pte} +\end{figure*} + +Sv32 page tables consist of $2^{10}$ page-table entries (PTEs), each +of four bytes. A page table is exactly the size of a page and must +always be aligned to a page boundary. The physical page number of the +root page table is stored in the {\tt sptbr} register. + +The PTE format for Sv32 is shown in Figures~\ref{sv32pte}. The V bit +indicates whether the PTE is valid; if it is 0, bits 31--1 of the PTE are +don't-cares and may be used freely by software. The permission bits, R, W, +and X, indicate whether the page is readable, writable, and executable, +respectively. When all three are zero, the PTE is a pointer to the next level +of the page table; otherwise, it is a leaf PTE. Writable pages must also be +marked readable; the contrary combinations are reserved for future use. +Table~\ref{pteperm} summarizes the encoding of the permission bits. + +\begin{table*}[h!] +\begin{center} +\begin{tabular}{|c|c|c||l|} +\hline +X & W & R & Meaning \\ +\hline +0 & 0 & 0 & Pointer to next level of page table. \\ +0 & 0 & 1 & Read-only page. \\ +0 & 1 & 0 & {\em Reserved for future use.} \\ +0 & 1 & 1 & Read-write page. \\ +1 & 0 & 0 & Execute-only page. \\ +1 & 0 & 1 & Read-execute page. \\ +1 & 1 & 0 & {\em Reserved for future use.} \\ +1 & 1 & 1 & Read-write-execute page. \\ +\hline +\end{tabular} +\end{center} +\caption{Encoding of PTE R/W/X fields.} +\label{pteperm} +\end{table*} + +The U bit indicates whether the page is accessible to user mode. +U-mode software may only access the page when U=1. If the PUM bit +in the {\tt sstatus} register is +clear, supervisor mode software may also access pages with U=1. +However, supervisor code normally operates with the PUM bit set, in +which case, supervisor code will fault on accesses to user-mode pages. + +\begin{commentary} +An alternative PTE format would support different permissions for supervisor +and user. We omitted this feature because it would be largely redundant with +the PUM mechanism (see Section~\ref{sec:pum}) and would require more encoding +space in the PTE. +\end{commentary} + +The G bit designates a {\em global} mapping. Global mappings are those that +exist in all address spaces. For non-leaf PTEs, the global setting implies +that all mappings in the subsequent levels of the page table are global. Note +that failing to mark a global mapping as global merely reduces performance, +whereas marking a non-global mapping as global is an error. + +\begin{commentary} +Global mappings were devised to reduce the cost of context switches. They +need not be flushed from an implementation's address translation caches when +an SFENCE.VM instruction is executed with a nonzero ASID value in {\tt sptbr}. +\end{commentary} + +Each leaf PTE maintains an accessed (A) and dirty (D) bit. When a +virtual page is read, written, or fetched from, the implementation +sets the A bit in the corresponding PTE. When a virtual page is +written, the implementation additionally sets the D bit in the +corresponding PTE. The PTE updates are exact and are observed in +program order by the local hart. The ordering on loads and stores +provided by FENCE instructions and the acquire/release bits on atomic +instructions also orders the PTE updates associated with those loads +and stores as observed by remote harts. + +\begin{commentary} +We have changed the behavior of the PTE updates to be exact and in +program order on a hart. This significantly simplifies the +specification, and can be implemented with high performance. + +The A and D bits are never cleared by the implementation. If the +supervisor software does not rely on accessed and/or dirty bits, +e.g. if it does not swap memory pages to secondary storage or if the +pages are being used to map I/O space, it should always set them to 1 +in the PTE. The implementation can then avoid issuing memory accesses +to set the bits. +\end{commentary} + +Any level of PTE may be a leaf PTE, so in addition to 4 KiB pages, Sv32 +supports 4 MiB {\em megapages}. A megapage must be virtually and +physically aligned to a 4 MiB boundary. + +\subsection{Virtual Address Translation Process} +\label{sv32algorithm} + +A virtual address $va$ is translated into a physical address $pa$ as +follows: + +\begin{enumerate} + +\item Let $a$ be ${\tt sptbr}.ppn \times \textrm{PAGESIZE}$, and let $i=\textrm{LEVELS} - 1$. (For Sv32, PAGESIZE=$2^{12}$ and LEVELS=2.) + +\item Let $pte$ be the value of the PTE at address $a+va.vpn[i]\times \textrm{PTESIZE}$. (For Sv32, PTESIZE=4.) + +\item If $pte.v=0$, or if $pte.r=0$ and $pte.w=1$, stop and raise an access exception. + +\item Otherwise, the PTE is valid. +If $pte.r=1$ or $pte.x=1$, go to step 5. +Otherwise, this PTE is a pointer to the next level of the page table. Let +$i=i-1$. If $i<0$, stop and raise an access exception. Otherwise, let +$a=pte.ppn \times \textrm{PAGESIZE}$ and go to step 2. + +\item A leaf PTE has been found. Determine if the requested memory access is +allowed by the $pte.r$, $pte.w$, and $pte.x$ bits. +If not, stop and raise an access exception. +Otherwise, the translation is successful. Set $pte.a$ to 1, and, if the +memory access is a store, set $pte.d$ to 1. The translated physical address is +given as follows: +\begin{itemize} +\item $\textit{pa.pgoff} = \textit{va.pgoff}$. +\item If $i>0$, then this is a superpage translation and $pa.ppn[i-1:0]=va.vpn[i-1:0]$. +\item $pa.ppn[\textrm{LEVELS} - 1:i] = pte.ppn[\textrm{LEVELS} - 1:i]$. +\end{itemize} + +\end{enumerate} + +\section{Sv39: Page-Based 39-bit Virtual-Memory System} + +This section describes a simple paged virtual-memory system designed +for RV64 systems, which supports 39-bit virtual address spaces. The +design of Sv39 follows the overall scheme of Sv32, and this section +details only the differences between the schemes. + +\subsection{Addressing and Memory Protection} + +Sv39 implementations support a 39-bit virtual address space, divided +into \wunits{4}{KiB} pages. An Sv39 address is partitioned as +shown in Figure~\ref{sv39va}. Load and store effective addresses, +which are 64 bits, must have bits 63--39 all equal to bit 38, or else +an access fault will occur. The 27-bit VPN is translated into a +38-bit PPN via a three-level page table, while the 12-bit page offset +is untranslated. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}O@{}O@{}O@{}O} +\instbitrange{38}{30} & +\instbitrange{29}{21} & +\instbitrange{20}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{VPN[2]} & +\multicolumn{1}{c|}{VPN[1]} & +\multicolumn{1}{c|}{VPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +9 & 9 & 9 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv39 virtual address.} +\label{sv39va} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}T@{}O@{}O@{}O} +\instbitrange{49}{30} & +\instbitrange{29}{21} & +\instbitrange{20}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{PPN[2]} & +\multicolumn{1}{c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +20 & 9 & 9 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv39 physical address.} +\label{sv39pa} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}Y@{}Y@{}Y@{}Y@{}Scccccccc} +\instbitrange{63}{48} & +\instbitrange{47}{28} & +\instbitrange{27}{19} & +\instbitrange{18}{10} & +\instbitrange{9}{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{\it Reserved} & +\multicolumn{1}{c|}{PPN[2]} & +\multicolumn{1}{c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{\it Reserved for SW} & +\multicolumn{1}{c|}{D} & +\multicolumn{1}{c|}{A} & +\multicolumn{1}{c|}{G} & +\multicolumn{1}{c|}{U} & +\multicolumn{1}{c|}{X} & +\multicolumn{1}{c|}{W} & +\multicolumn{1}{c|}{R} & +\multicolumn{1}{c|}{V} \\ +\hline +16 & 20 & 9 & 9 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv39 page table entry.} +\label{sv39pte} +\end{figure*} + +Sv39 page tables contain $2^9$ page table entries (PTEs), eight +bytes each. A page table is exactly the size of a page and must +always be aligned to a page boundary. The physical address of the +root page table is stored in the {\tt sptbr} register. + +The PTE format for Sv39 is shown in Figure~\ref{sv39pte}. Bits 9--0 +have the same meaning as for Sv32. Bits 63--48 are reserved +for future use and must be zeroed by software for forward compatibility. + +\begin{commentary} +We reserved several PTE bits for a possible extension that improves +support for sparse address spaces by allowing page-table levels to be +skipped, reducing memory usage and TLB refill latency. These reserved +bits may also be used to facilitate research experimentation. The +cost is reducing the physical address space, but \wunits{1}{PiB} is +presently ample. If at some point it no longer suffices, the reserved +bits that remain unallocated could be used to expand the physical +address space. +\end{commentary} + +Any level of PTE may be a leaf PTE, so in addition to \wunits{4}{KiB} +pages, Sv39 supports \wunits{2}{MiB} {\em megapages} and +\wunits{1}{GiB} {\em gigapages}, each of which must be virtually and +physically aligned to a boundary equal to its size. + +The algorithm for virtual-to-physical address translation is the same as in +Section~\ref{sv32algorithm}, except LEVELS equals 3 and PTESIZE equals 8. + +\section{Sv48: Page-Based 48-bit Virtual-Memory System} + +This section describes a simple paged virtual-memory system designed +for RV64 systems, which supports 48-bit virtual address spaces. Sv48 +is intended for systems for which a 39-bit virtual address space is +insufficient. It closely follows the design of Sv39, simply adding an +additional level of page table, and so this chapter only details the +differences between the two schemes. + +Implementations that support Sv48 should also support Sv39. + +\begin{commentary} +We specified two virtual memory systems for RV64 to relieve the +tension between providing a large address space and minimizing +address-translation cost. For many systems, \wunits{512}{GiB} of +virtual-address space is ample, and so Sv39 suffices. Sv48 increases +the virtual address space to \wunits{256}{TiB} but increases the +physical memory capacity dedicated to page tables, the latency of +page-table traversals, and the size of hardware structures that store +virtual addresses. + +Systems that support Sv48 can also support Sv39 at essentially no cost, +and so should do so to support supervisor software that assumes Sv39. +\end{commentary} + +\subsection{Addressing and Memory Protection} + +Sv48 implementations support a 48-bit virtual address space, divided +into \wunits{4}{KiB} pages. An Sv48 address is partitioned as +shown in Figure~\ref{sv48va}. Load and store effective addresses, +which are 64 bits, must have bits 63--48 all equal to bit 47, or else +an access fault will occur. The 36-bit VPN is translated into a +38-bit PPN via a four-level page table, while the 12-bit page offset +is untranslated. + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}O@{}O@{}O@{}O@{}O} +\instbitrange{47}{39} & +\instbitrange{38}{30} & +\instbitrange{29}{21} & +\instbitrange{20}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{VPN[3]} & +\multicolumn{1}{c|}{VPN[2]} & +\multicolumn{1}{c|}{VPN[1]} & +\multicolumn{1}{c|}{VPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +9 & 9 & 9 & 9 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv48 virtual address.} +\label{sv48va} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}E@{}O@{}O@{}O@{}O} +\instbitrange{49}{39} & +\instbitrange{38}{30} & +\instbitrange{29}{21} & +\instbitrange{20}{12} & +\instbitrange{11}{0} \\ +\hline +\multicolumn{1}{|c|}{PPN[3]} & +\multicolumn{1}{c|}{PPN[2]} & +\multicolumn{1}{c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{page offset} \\ +\hline +11 & 9 & 9 & 9 & 12 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv48 physical address.} +\label{sv48pa} +\end{figure*} + +\begin{figure*}[h!] +{\footnotesize +\begin{center} +\begin{tabular}{@{}Y@{}Y@{}Y@{}Y@{}Y@{}Fcccccccc} +\instbitrange{63}{48} & +\instbitrange{47}{37} & +\instbitrange{36}{28} & +\instbitrange{27}{19} & +\instbitrange{18}{10} & +\instbitrange{9}{8} & +\instbit{7} & +\instbit{6} & +\instbit{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{\it Reserved} & +\multicolumn{1}{c|}{PPN[3]} & +\multicolumn{1}{c|}{PPN[2]} & +\multicolumn{1}{c|}{PPN[1]} & +\multicolumn{1}{c|}{PPN[0]} & +\multicolumn{1}{c|}{\it Res. SW} & +\multicolumn{1}{c|}{D} & +\multicolumn{1}{c|}{A} & +\multicolumn{1}{c|}{G} & +\multicolumn{1}{c|}{U} & +\multicolumn{1}{c|}{X} & +\multicolumn{1}{c|}{W} & +\multicolumn{1}{c|}{R} & +\multicolumn{1}{c|}{V} \\ +\hline +16 & 11 & 9 & 9 & 9 & 2 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1\\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Sv48 page table entry.} +\label{sv48pte} +\end{figure*} + +The PTE format for Sv48 is shown in Figure~\ref{sv48pte}. Bits 9--0 +have the same meaning as for Sv32. Any level of PTE may be a leaf +PTE, so in addition to \wunits{4}{KiB} pages, Sv48 supports +\wunits{2}{MiB} {\em megapages}, \wunits{1}{GiB} {\em gigapages}, and +\wunits{512}{GiB} {\em terapages}, each of which must be virtually and +physically aligned to a boundary equal to its size. + +The algorithm for virtual-to-physical address translation is the same +as in Section~\ref{sv32algorithm}, except LEVELS equals 4 and PTESIZE +equals 8. diff --git a/src/t.tex b/src/t.tex new file mode 100644 index 0000000..103c045 --- /dev/null +++ b/src/t.tex @@ -0,0 +1,16 @@ +\chapter{``T'' Standard Extension for Transactional Memory, Version 0.0} +\label{sec:bits} + +This chapter is a placeholder for a future standard extension to +provide transactional memory operations. + +\begin{commentary} +Despite much research over the last twenty years, and initial +commercial implementations, there is still much debate on the best way +to support atomic operations involving multiple addresses. + +Our current thoughts are to include a small limited-capacity +transactional memory buffer along the lines of the original +transactional memory proposals. +\end{commentary} + diff --git a/src/v.tex b/src/v.tex new file mode 100644 index 0000000..29d4144 --- /dev/null +++ b/src/v.tex @@ -0,0 +1,749 @@ +\chapter{``V'' Standard Extension for Vector Operations, Version 0.1} +\label{sec:bits} + +This chapter presents a proposal for the RISC-V vector instruction set +extension. The vector extension supports a configurable vector unit, +to tradeoff the number of architectural vector registers and supported +element widths against available maximum vector length. The vector +extension is designed to allow the same binary code to work +efficiently across a variety of hardware implementations varying in +physical vector storage capacity and datapath parallelism. + +\begin{commentary} +The vector extension is based on the style of vector register +architecture introducted by Seymour Cray in the 1970s, as opposed to +the earlier packed SIMD approach, introduced with the Lincoln Labs +TX-2 in 1957 and now adopted by most other commerical instruction +sets. + +The vector instruction set contains many features developed in earlier +research projects, including the Berkeley T0 and VIRAM vector +microprocessors, the MIT Scale vector-thread processor, and the +Berkeley Maven and Hwacha projects. +\end{commentary} + +\section{Vector Unit State} + +The additional vector unit architectural state consists of 32 vector +data registers ({\tt v0}--{\tt v31}), 8 vector predicate registers +({\tt vp0}-{\tt vp7}), and an XLEN-bit WARL vector length CSR, {\tt + vl}. In addition, the current configuration of the vector unit is +held in a set vector configuration CSRs ({\tt vcmaxw}, {\tt vctype}, +{\tt vcnpred}), as described below. The implementation determines an +available {\em maximum vector length} (MVL) for the current +configuration held in the {\tt vcmaxw} and {\tt vcnpred} registers. +There is also a 3-bit fixed-point rounding mode CSR {\tt vxrm}, and a +single-bit fixed-point saturation status CSR {\tt vxsat}. + +\begin{table} + \centering + \begin{tabular}{|l|c|l|} + \hline + CSR name & Number & Base ISA \\ + \hline + {\tt vl} & 0x020 & RV32, RV64, RV128 \\ + {\tt vxrm} & 0x020 & RV32, RV64, RV128 \\ + {\tt vxsat} & 0x020 & RV32, RV64, RV128 \\ + {\tt vcsr} & 0x020 & RV32, RV64, RV128 \\ + \hline + {\tt vcnpred} & 0x020 & RV32, RV64, RV128 \\ + \hline + {\tt vcmaxw} & 0x020 & RV32, RV64, RV128 \\ + {\tt vcmaxw1} & 0x020 & RV32 \\ + {\tt vcmaxw2} & 0x020 & RV32, RV64 \\ + {\tt vcmaxw3} & 0x020 & RV32 \\ + \hline + {\tt vctype} & 0x020 & RV32, RV64, RV128 \\ + {\tt vctype1} & 0x020 & RV32 \\ + {\tt vctype2} & 0x020 & RV32, RV64 \\ + {\tt vctype3} & 0x020 & RV32 \\ + \hline + {\tt vctypev0} & 0x020 & RV32, RV64, RV128 \\ + {\tt vctypev1} & 0x020 & RV32, RV64, RV128 \\ + ... \\ + {\tt vctypev31} & 0x020 & RV32, RV64, RV128 \\ + \hline + \end{tabular} + \caption{Vector extension CSRs.} + \label{tab:vcsrs} +\end{table} + +\section{Element Datatypes and Width} + +The datatypes and operations supported by the V extension depend upon +the base scalar ISA and supported extensions, and may include 8-bit, +16-bit, 32-bit, 64-bit, and 128-bit integer and fixed-point data types +(X8, X16, X32, X64, and X128 respectively), and 16-bit, 32-bit, +64-bit, and 128-bit floating-point types (F16, F32, F64, and F128 +respectively). When the V extension is added, it must support the +vector data element types implied by the supported scalar types as +defined by Table~\ref{tab:velemtypes}. The largest element width +supported: +\[ \mbox{\em ELEN} = max(\mbox{\em XLEN}, \mbox{\em FLEN}) \] + +\begin{commentary} + Compiler support for vectorization is greatly simplified when any + hardware-supported data types are supported by both scalar and + vector instructions. +\end{commentary} + +\begin{table} + \centering +\begin{tabular}{|l|l|} + \hline + \multicolumn{2}{|c|}{Supported Fixed-Point Widths} \\ + \hline + RV32I & X8, X16, X32 \\ + RV64I & X8, X16, X32, X64 \\ + RV128I & X8, X16, X32, X64, X128 \\ + \hline + \hline + \multicolumn{2}{|c|}{Supported Floating-Point Widths} \\ + \hline + F & F16, F32 \\ + FD & F16, F32, F64 \\ + FDQ & F16, F32, F64, F128 \\ + \hline +\end{tabular} +\caption{Supported data element widths depending on base integer ISA + and supported floating-point extensions. Note that supporting a + given floating-point width mandates support for all narrower + floating-point widths.} +\label{tab:velemtypes} +\end{table} + +Adding the vector extension to any machine with floating-point support +adds support for the IEEE standard half-precision 16-bit +floating-point data type. This includes a set of scalar +half-precision instructions described in +Section~\ref{sec:scalarhalffloat}. The scalar half-precision +instructions follow the template for other floating-point precisions, +but using the hitherto unused {\em fmt} field encoding of {\tt 10}. + +\begin{commentary} + We only support scalar half-precision floating-point types as part + of the vector extension, as the main benefits of half-precision are + obtained when using vector instructions that amortize per-operation + control overhead. Not supporting a separate scalar half-precision + floating-point extension also reduces the number of standard + instruction-set variants. +\end{commentary} + +\section{Vector Configuration Registers ({\tt vcmaxw}, {\tt + vctype}, {\tt vcp})} + +The vector unit must be configured before use. Each architectural +vector data register ({\tt v0}--{\tt v31}) is configured with the +maximum number of bits allowed in each element of that vector data +register, or can be disabled to free physical vector storage for other +architectural vector data registers. The number of available +vector predicate registers can also be set independently. + +The available MVL depends on the configuration setting, but MVL must +always have the same value for the same configuration parameters on a +given implementation. Implementations must provide an MVL of at least +four elements for all supported configuration settings. + +Each vector data register's current maximum-width is held in a +separate four-bit field in the {\tt vcmaxw} CSRs, encoded as shown in +Table~\ref{tab:vcmaxw}. + +\begin{table}[hbt] + \centering + \begin{tabular}{|r|c|} + \hline + Width & Encoding \\ + \hline + Disabled & 0000 \\ + 8 & 1000 \\ + 16 & 1001 \\ + 32 & 1010 \\ + 64 & 1011 \\ + 128 & 1100 \\ + \hline + \end{tabular} + \caption{Encoding of {\tt vcmaxw} fields. All other values are + reserved.} + \label{tab:vcmaxw} +\end{table} + +\begin{commentary} + Several earlier vector machines had the ability to configure + physical vector register storage into a larger number of short + vectors or a shorter number of long vectors, in particular the + Fujitsu VP series~\cite{vp200}. +\end{commentary} + +In addition, each vector data register has an associated dynamic type +field that is held in a four-bit field in the {\tt vctype} CSRs, +encoded as shown in Table~\ref{tab:vctype}. The dynamic type field of +a vector data register is constrained to only hold types that have +equal or lesser width than the value in the corresponding {\tt vcmaxw} +field for that vector data register. Changes to {\tt vctype} do not +alter MVL. + +\begin{table}[hbt] + \centering + \begin{tabular}{|l|c|c|} + \hline + Type & {\tt vctype} encoding & {\tt vcmaxw} equivalent\\ + \hline + Disabled & 0000 & 0000 \\ + F16 & 0001 & 1001 \\ + F32 & 0010 & 1010 \\ + F64 & 0011 & 1011 \\ + F128 & 0100 & 1100 \\ + X8 & 1000 & 1000 \\ + X16 & 1001 & 1001 \\ + X32 & 1010 & 1010 \\ + X64 & 1011 & 1011 \\ + X128 & 1100 & 1100 \\ + \hline + \end{tabular} + \caption{Encoding of {\tt vctype} fields. The third column shows the + value that will be saved when writing to {\tt vcmaxw} fields. All + other values are reserved.} + \label{tab:vctype} +\end{table} + +\begin{commentary} + Vector data registers have both a maximum element width and a + current element data type to support vector function calls, where + the caller does not know the types needed by the callee, as + described below. +\end{commentary} + +To reduce configuration time, writes to a {\tt vcmaxw} field also +write the corresponding {\tt vctype} field. The {\tt vcmaxw} field +can be written any value taken from the type encoding in +Table~\ref{tab:vctype}, but only the width information as shown in +Table~\ref{tab:vcmaxw} will be recorded in the {\tt vcmaxw} fields +whereas the full type information will be recorded in the +corresponding {\tt vctype} field. + +Attempting to write any {\tt vcmaxw} field with a width larger than +that supported by the implementation will raise an illegal instruction +exception. Implementations are allowed to record a {\tt vcmaxw} value +larger than the value requested. In particular, an implementation may +choose to hardwire {\tt vcmaxw} fields to the largest supported width. + +Attempting to write an unsupported type or a type that requires more +than the current {\tt vcmaxw} width to a {\tt vctype} field will raise +an exception. + +Any write to a field in the {\tt vcmaxw} register configures the +vector unit and causes all vector data registers to be zeroed and all +vector predicate registers to be set, and the vector length register +{\tt vl} to be set to the maximum supported vector length. + +Any write to a {\tt vctype} field zeros only the associated vector +data register, leaving the other vector unit state undisturbed. +Attempting to write a type needing more bits than the corresponding +{\tt vcmaxw} value to a {\tt vctype} field will raise an illegal +instruction exception. + +\begin{commentary} + Vector registers are zeroed on reconfiguration to prevent security + holes and to avoid exposing differences between how different + implementations manage physical vector register storage. + + In-order implementations will probaby use a flag bit per register to + mux in 0 instead of garbage values on each source until it is + overwritten. For in-order machines, partial writes due to + predication or vector lengths less than MVL complicate this zeroing, + but these cases can be handled by adopting a hardware + read-modify-write, adding a zero bit per element, or a trap to + machine-mode trap handler if first write access after configuration + is partial. Out-of-order machines can just point initial rename + table at physical zero register. +\end{commentary} + +%% Can support larger number of architectural vector registers with +%% future extensions. + +In RV128, {\tt vcmaxw} is a single CSR holding 32 4-bit width +fields. Bits $(4N+3)$--$(4N)$ hold the maximum width of vector data +register $N$. In RV64, the {\tt vcmaxw2} CSR provides access to the +upper 64 bits of {\tt vcmaxw}. In RV32, the {\tt vcmaxw1} CSR +provides access to bits 63--32 of {\tt vcmaxw}, while {\tt vcmax3} CSR +provides access to bits 127--96. + +The {\tt vcnpred} CSR contains a single 4-bit WLRL field giving the +number of enabled architectural predicate registers, between 0 and 8. +Any write to {\tt vcnpred} zeros all vector data registers, sets all +bits in visible vector predicate registers, and sets the vector length +register {\tt vl} to the maximum supported vector length. Attempting +to write a value larger than 8 to {\tt vcnpred} raises an illegal +instruction exception. + +\section{Vector Length} + +The active vector length is held in the XLEN-bit WARL vector length +CSR {\tt vl}, which can only hold values between 0 and MVL inclusive. +Any writes to the maximum configuration registers ({\tt vcmaxw} or +{\tt vcnpred}) cause {\tt vl} to be initialized with MVL. Writes to +{\tt vctype} do not affect {\tt vl}. + +The active vector length is usually written with the {\tt setvl} +instruction, which is encoded as a {\tt csrrw} instruction to the {\tt + vl} CSR number. The source argument to the {\tt csrrw} is the +requested application vector length (AVL) as an unsigned XLEN-bit +integer. The {\tt setvl} instruction calculates the value to assign to +{\tt vl} according to Table~\ref{tab:vlcalc}. + +\begin{table} + \centering + \begin{tabular}{|c|c|} + \hline + AVL Value & {\tt vl} setting \\ + \hline + AVL $\geq$ 2\,MVL & MVL \\ + 2\,MVL $>$ AVL $>$ MVL & $\lfloor$AVL$/2\rfloor$ \\ + MVL $\geq$ AVL & AVL \\ + \hline + \end{tabular} + \caption{Operation of {\tt setvl} instruction to set vector + length register {\tt vl} based on requested application vector + length (AVL) and current maximum vector length (MVL).} + \label{tab:vlcalc} +\end{table} + +\begin{commentary} + The rules for setting the {\tt vl} register help keep vector + pipelines full over the last two iterations of a stripmined loop. + Similar rules were previously used in Cray-designed machines~\cite{crayx1asm}. +\end{commentary} + +The {\tt vl} register is updated with the minimum of AVL and +MVL, and this value is also returned as the result of the {\tt setvl} +instruction. Note that unlike a regular {\tt csrrw} instruction, the +value returned is not the original CSR value but the modified value. + +\begin{commentary} + The idea of having implementation-defined vector length dates back + to at least the IBM 3090 Vector Facility~\cite{ibm370varch}, which + used a special ``Load Vector Count and Update'' (VLVCU) instruction + to control stripmine loops. The {\tt setvl} instruction included + here is based on the simpler {\tt setvlr} instruction introduced by + Asanovi\'{c}~\cite{krstephd}. +\end{commentary} + +The {\tt setvl} instruction is typically used at the start of every +iteration of a stripmined loop to set the number of vector elements to +operate on in the following loop iteration. The current MVL can be +obtained by performing a {\tt setvl} with a source argument that has +all bits set (largest unsigned integer). + +No element operations are performed for any vector instruction when +{\tt vl}=0. + +\begin{figure}[bt] + \centering +\begin{verbatim} + # Vector-vector 32-bit add loop. + # Assume vector unit configured with correct types. + # a0 holds N + # a1 holds pointer to result vector + # a2 holds pointer to first source vector + # a3 holds pointer to second source vector. + loop: setvl t0, a0 + vld v0, a2 # Load first vector + sll t1, t0, 2 # multiply by bytes + add a2, t1 # Bump pointer + vld v1, a3 # Load second vector + add a3, t1 # Bump pointer + vadd v0, v1 # Add elements + sub a0, t0 # Decrement elements completed + vst v0, a1 # Store result vector + add a1, t1 # Bump pointer + bnez a0, loop # Any more? +\end{verbatim} +\caption{Example vector-vector add loop.} +\label{fig:vvadd} +\end{figure} + +\section{Rapid Configuration Instructions} + +It can take several instructions to set {\tt vcmaxw}, {\tt vctype} and +{\tt vcnpred} to a given configuration. To accelerate configuring the +vector unit, specialized {\tt vcfg} instructions are added that are +encoded as writes to CSRs with encoded immediate values that set +multiple fields in the {\tt vcmaxw}, {\tt vctype}, and {\tt vncpred} +configuration registers. + +The {\tt vcfgd} instruction is encoded as a CSRRW that takes a +register value encoded as shown in Figure~\ref{fig:vdcfg}, and which +returns the corresponding MVL in the destination register. A +corresponding {\tt vcfgdi} instruction is encoded as a CSRRWI that +takes a 5-bit immediate value to set the configuration, and returns +MVL in the destination register. + +\begin{commentary} + One of the primary uses of {\tt vcfgdi} is to configure the vector + unit with single-byte element vectors for use in {\tt memcpy} and + {\tt memset} routines. A single instruction can configure the + vector unit for these operation. +\end{commentary} + +The {\tt vcfgd} instruction also clears the {\tt vcnpred} register, so +no predicate registers are allocated. + +\begin{figure}[hbt] + \centering + \begin{tabular}{p{2cm}p{2cm}ccc|c|c|c|c|c|c|c|l} + \cline{6-12} + & & & & & 0 & F64 & F32 & F16 & X32 & X16 & X8 & RV32 \\ + \cline{6-12} + \multicolumn{1}{c}{} & + \multicolumn{1}{c}{} & + \multicolumn{1}{c}{} & + \multicolumn{1}{c}{} & + \multicolumn{1}{c}{} & + \multicolumn{1}{c}{2} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & \\ + \cline{2-12} + & \multicolumn{2}{|c|}{0} & \multicolumn{1}{c|}{F128} & \multicolumn{2}{c|}{X64} & F64 & F32 & F16 & X32 & X16 & X8 & RV64 \\ + \cline{2-12} + \multicolumn{1}{c}{} & + \multicolumn{2}{c}{24} & + \multicolumn{1}{c}{5} & + \multicolumn{2}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & \\ + \cline{1-12} + \multicolumn{2}{|c|}{0} & \multicolumn{1}{c|}{X128} & + \multicolumn{1}{c|}{F128} & \multicolumn{2}{c|}{X64} & F64 & F32 & F16 & X32 & X16 & X8 & RV128 \\ + \cline{1-12} + \multicolumn{2}{c}{83} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{2}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & + \multicolumn{1}{c}{5} & \\ + \end{tabular} + \caption{Format of the {\tt vcfgd} value for different base ISAs, + holding 5-bit vector register numbers for each supported + type. Fields must either contain 0 indicating no vector registers + are allocated for that type, or a vector register number greater + than all to the right. All vector register numbers inbetween two + non-zero fields are allocated to the type with the higher vector + register number. } + \label{fig:vdcfg} +\end{figure} + +The {\tt vcfgd} value specifies how many vector registers of each +datatype are allocated, and is divided into 5-bit fields, one per +supported datatype. A value of 0 in a field indicates that no +registers of that type are allocated. A non-zero value indicates the +highest vector + +Each 5-bit field in the {\tt vcfgd} value must contain either zero, +indicating that no vector registers are allocated for that type, or a +vector register number greater than all fields in lower bit positions, +indicating the highest vector register containing the associated type. +This encoding can compactly represent any arbitrary allocation of +vector registers to data types, except that there must be at least two +vector registers ({\tt v0} and {\tt v1}) allocated to the narrowest +required type. An example allocation is shown in +Figure~\ref{fig:vcfgdexample}. + +\begin{figure} + \centering + \begin{tabular}{|c|c|c|c|c|c|c|} + \hline + 0 & F64 & F32 & F16 & X32 & X16 & X8 \\ + \hline + \hline + 0 & 18 & 12 & 0 & 1 & 0 & 0 \\ + \hline + \end{tabular} + \\ + \vspace{0.1in} + \begin{tabular}{|c|c|c|c|} + \hline + Vector registers & {\tt vcmaxw} & {\tt vctype} & Type \\ + \hline + {\tt v31}--{\tt v19} & \tt 0000 & \tt 0000 & Disabled \\ + {\tt v18}--{\tt v13} & \tt 1011 & \tt 0011 & F64 \\ + {\tt v12}--{\tt v2} & \tt 1010 & \tt 0010 & F32 \\ + {\tt v1}--{\tt v0} & \tt 1010 & \tt 1010 & X32 \\ + \hline + \end{tabular} + \caption{Example use of {\tt vcfgd} value to set configuration.} + \label{fig:vcfgdexample} +\end{figure} + +Separate {\tt vcfgp} and {\tt vcfgpi} instructions are provided, using +the CSRRW and CSRRWI encodings respectively, that write the source +value to the {\tt vcnpred} register and return the new MVL. These +writes also clear the vector data registers, set all bits in the +allocated predicate registers, and set {\tt vl}=MVL. A {\tt vcfgp} or +{\tt vcfgpi} instruction can be used after a {\tt vcfgd} to complete a +reconfiguration of the vector unit. + +If a zero argument is given to {\tt vcgfd} the vector unit will be +unconfigured with no enabled registers, and the value 0 will be +returned for MVL. Only the configuration registers {\tt vcmaxw} and +{\tt vcnpred} can be accessed in this state, either directly or via +{\tt vcfgd}, {\tt vcfgdi}, {\tt vcfgp}, or {\tt vcfgpi} +instructions. Other vector instructions will raise an illegal +instruction exception. + +To quickly change the individual types of a vector register, each +vector data register $n$ has a dedicated CSR address to access its +{\tt vctype} field, named {\tt vctypev}$n$. The {\tt vcfgt} and {\tt + vcfgti} instructions are assembler pseudo-instructions for regular +CSRRW and CSRRWI instructions that update the type fields and return +the original value. The {\tt vcfgti} instruction is typically used to +change to a desired type while recording the previous type in one +instruction, and the {\tt vcfgt} instruction is used to revert back to +the saved type. + + + +%% # integer vector-scalar/scalar-vector operations use low-order bits of +%% # scalar operand. + +%% 3130 9 8 7 6 5 4 3 2 120 9 8 7 6 5 4 3 2 110 9 8 7 6 5 4 3 2 1 0 +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% | func7 | rs2 | rs1 |func3| rd | opcode |1 1| +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% | rs3 |fn2| rs2 | rs1 |func3| rd | opcode |1 1| +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +%% Uses two reserved opcodes in 32-bit space: +%% VOP = 10101 11 +%% VMEM = 11101 11 + +%% VOP + +%% func3 (xs2, xs1, xf) + +%% mostly encodes which scalar values are accessed as with ROCC. + +%% xs2 - 1 = reads scalar rs2 +%% xs1 - 1 = reads scalar rs1 +%% xf - 0=integer/1=float applies to both + + +%% Integer arithmetic + +%% 3130 9 8 7 6 5 4 3 2 120 9 8 7 6 5 4 3 2 110 9 8 7 6 5 4 3 2 1 0 +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% | func7 | rs2 | rs1 |func3| rd | opcode |1 1| +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% # Integer add/sub +%% # sign-extend smaller source +%% # wraparound overflow into destination +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vadd.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vadd.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vsub.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsub.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsub.sv vd, rs1, vs2 +%% # zero-extend smaller source +%% # wraparound overflow into destination +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vaddu.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vaddu.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vsubu.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsubu.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsubu.sv vd, rs1, vs2 +%% # Shifts use low bits of vsrc2, enough for src1 width +%% # srl/a fills in zero/sign bits in destination +%% # wraparound overflow into destination +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vsll.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsll.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsll.sv vd, rs1, vs2 (less useful) +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vsrl.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsrl.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsrl.sv vd, rs1, vs2 (table lookup) +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vsra.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsra.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vsra.vs vd, rs1, vs2 (less useful) +%% # Logical ops +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vand.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vand.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vor.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vor.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 vd 1 0 1 0 1 1 1 vxor.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 0 1 0 1 1 1 vxor.vs vd, vs2, rs1 +%% # Predicate setting (only pd = 0-7 valid) +%% # smaller source is sign-extended +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vpeq.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpeq.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vpne.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpne.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vplt.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vplt.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vplt.sv pd, rs1, vs2 +%% # smaller source is zero-extended (not sure if needed for eq/neq) +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vpequ.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpequ.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vpneu.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpneu.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 0 0 pd 1 0 1 0 1 1 1 vpltu.vv pd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpltu.vs pd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 pd 1 0 1 0 1 1 1 vpltu.sv pd, rs1, vs2 + + +%% # Multiply/Divide +%% vvmul vdest, vsrc1, vsrc2 # Signed multiply +%% vsmul vdest, vsrc1, xsrc2 # Signed multiply + +%% vvmulh vdest, vsrc1, vsrc2 # Signed multiply +%% vsmulh vdest, vsrc1, xsrc2 # Signed multiply + +%% vvmulu vdest, vsrc1, vsrc2 # Unsigned multiply +%% vsmulu vdest, vsrc1, xsrc2 # Unsigned multiply + +%% vvmulhu vdest, vsrc1, vsrc2 # Unsigned multiply +%% vsmulhu vdest, vsrc1, xsrc2 # Unsigned multiply + +%% vvmulsu vdest, vsrc1, vsrc2 # Signed-unsigned multiply +%% vsmulsu vdest, vsrc1, xsrc2 # Signed-unsigned multiply +%% svmulsu vdest, xsrc1, vsrc2 # Signed-unsigned multiply +%% vvmulhsu vdest, vsrc1, vsrc2 # Signed-unsigned multiply +%% vsmulhsu vdest, vsrc1, xsrc2 # Signed-unsigned multiply +%% svmulhsu vdest, xsrc1, vsrc2 # Signed-unsigned multiply + +%% vvdiv vdest, vsrc1, vsrc2 +%% vsdiv vdest, vsrc1, xsrc2 +%% svdiv vdest, xsrc1, vsrc2 + +%% vvdivu vdest, vsrc1, vsrc2 +%% vsdivu vdest, vsrc1, xsrc2 +%% svdivu vdest, xsrc1, vsrc2 + +%% vvrem vdest, vsrc1, vsrc2 +%% vsrem vdest, vsrc1, xsrc2 +%% svrem vdest, xsrc1, vsrc2 + +%% vvremu vdest, vsrc1, vsrc2 +%% vsremu vdest, vsrc1, xsrc2 +%% svremu vdest, xsrc1, vsrc2 + +%% # Load/store, size/type given by destination register configuration + +%% 3130 9 8 7 6 5 4 3 2 120 9 8 7 6 5 4 3 2 110 9 8 7 6 5 4 3 2 1 0 +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% | func7 | rs2 | rs1 |func3| rd | opcode |1 1| +%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +%% # Unit-stride +%% # option to add post-increment to vld/vst using rs2 a la t0, but can do same with fusion +%% 0 0 0 0 0 0 0 0 0 0 0 0 rs1 0 1 0 vd 1 1 1 0 1 1 1 vld vd, rs1 +%% 0 0 0 0 0 0 0 0 0 0 0 0 rs1 0 1 0 vd 1 1 1 0 1 1 1 vst vd, rs1 +%% # Constant-stride +%% # Can add segments with immediate field in func7 +%% 0 0 0 0 0 0 0 rs2 rs1 1 1 0 vd 1 1 1 0 1 1 1 vlds vd, rs1, rs2 +%% 0 0 0 0 0 0 0 rs2 rs1 1 1 0 vd 1 1 1 0 1 1 1 vsts vd, rs1, rs2 +%% # Indexed (scatter/gather) +%% # Scalar base + vector offsets +%% # Can add segments with immediate field in func7 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vldx vd, rs1, vs2 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vstx vd, rs1, vs2 +%% # If A extension present: +%% # Vector atomics use vector base address +%% # t = M[vs2]; M[vs2] = t op vs1; vd = t +%% # must be matching integer 32b (W) or 64b (D) types in vs1 and vd +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoswap.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoswap.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoadd.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoadd.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoand.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoand.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoor.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoor.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoxor.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoxor.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamoxor.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamoxor.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamomax.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamomax.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamomaxu.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamomaxu.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamomin.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamomin.vs vd, vs2, rs1 +%% 0 0 0 0 0 0 0 vs2 vs1 0 1 0 vd 1 1 1 0 1 1 1 vamominu.vv vd, vs2, vs1 +%% 0 0 0 0 0 0 0 vs2 rs1 0 1 0 vd 1 1 1 0 1 1 1 vamominu.vs vd, vs2, rs1 + + +%% # Memory speculative options. If permission fault (not just page +%% # fault), then set sticky bit in predicate register vp1 rather than dying. + + + + +%% # Example Code +%% ---------------------------------------------------------------------- + +%% memset(a0=dest, a1=c, a2=len) +%% csrwi vdcfg, 1 # One vector register of 8b +%% mv t1, a0 # Copy dest +%% beqz a1, loop # Skip scalar move if a1=0 (could drop this instruction) +%% setvl t0, a2 # Set/find vector length +%% vmv.vs v0, a1 # Copy scalar a1 to elements of v0 +%% loop: setvl t0, a2 # Set/find vector length +%% vst t1, v0 # Set memory +%% sub a2, t0 # Decrement count +%% add t1, t0 # Bump pointer +%% bnez a2, loop # Any more? +%% done: vuncfg +%% j ra + + +%% # With ai +%% memset(a0=dest, a1=c, a2=len) +%% csrwi vdcfg, 1 # One vector register of 8b +%% mv t1, a0 # Copy dest +%% beqz a1, loop # Skip scalar move if a1=0 (could drop this instruction) +%% setvl t0, a2 # Set/find vector length +%% vmv.vs v0, a1 # Copy scalar a1 to elements of v0 +%% loop: setvl t0, a2 # Set/find vector length +%% vstai t1, v0, t0 # Set memory +%% sub a2, t0 # Decrement count +%% bnez a2, loop # Any more? +%% done: vuncfg +%% j ra + +%% ---------------------------------------------------------------------- + +%% memcpy(a0=dest, a1=src, a2=len) +%% csrwi vdcfg, 1 # One vector register of 8b +%% mv t2, a0 # Copy dest +%% loop: setvl t0, a2 # Set/find vector length +%% vld v0, a1 # Load vector +%% add a1, t0 # Bump pointer (can fuse with vld) +%% sub a2, t0 # Decrement count +%% vst t2, a0 # Store vector +%% add t2, t0 # Bump pointer (can fuse with vst) +%% bnez a2, loop # Any more? +%% done: vuncfg +%% j ra + +%% # with ldai/stai +%% memcpy(a0=dest, a1=src, a2=len) +%% csrwi vdcfg, 1 # One vector register of 8b +%% mv t2, a0 # Copy dest +%% loop: setvl t0, a2 # Set/find vector length +%% vldai v0, a1, t0 # Load vector +%% sub a2, t0 # Decrement count +%% vstai t2, a0, t0 # Store vector +%% bnez a2, loop # Any more? +%% done: vuncfg +%% j ra + + + -- cgit v1.1