aboutsummaryrefslogtreecommitdiff
path: root/src/f.tex
diff options
context:
space:
mode:
Diffstat (limited to 'src/f.tex')
-rw-r--r--src/f.tex742
1 files changed, 742 insertions, 0 deletions
diff --git a/src/f.tex b/src/f.tex
new file mode 100644
index 0000000..94c0920
--- /dev/null
+++ b/src/f.tex
@@ -0,0 +1,742 @@
+\chapter{``F'' Standard Extension for Single-Precision Floating-Point,
+Version 2.0}
+\label{sec:single-float}
+
+This chapter describes the standard instruction-set extension for
+single-precision floating-point, which is named ``F'' and adds
+single-precision floating-point computational instructions compliant
+with the IEEE 754-2008 arithmetic standard~\cite{ieee754-2008}.
+
+
+\section{F Register State}
+
+The F extension adds 32 floating-point registers, {\tt f0}--{\tt f31},
+each 32 bits wide, and a floating-point control and status register
+{\tt fcsr}, which contains the operating mode and exception status of the
+floating-point unit. This additional state is shown in
+Figure~\ref{fprs}. We use the term FLEN to describe the width of the
+floating-point registers in the RISC-V ISA, and FLEN=32 for the F
+single-precision floating-point extension. Most floating-point
+instructions operate on values in the floating-point register file.
+Floating-point load and store instructions transfer floating-point
+values between registers and memory. Instructions to transfer values
+to and from the integer register file are also provided.
+
+\begin{figure}[htbp]
+{\footnotesize
+\begin{center}
+\begin{tabular}{p{2in}}
+\instbitrange{FLEN-1}{0} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f0\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f1\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f2\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f3\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f4\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f5\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f6\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f7\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f8\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f9\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f10\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f11\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f12\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f13\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f14\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f15\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f16\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f17\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f18\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f19\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f20\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f21\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f22\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f23\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f24\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f25\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f26\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f27\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f28\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f29\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f30\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{\ \ \ f31\ \ \ \ \ }} \\ \cline{1-1}
+\multicolumn{1}{c}{FLEN} \\
+
+\instbitrange{31}{0} \\ \cline{1-1}
+\multicolumn{1}{|c|}{\reglabel{fcsr}} \\ \cline{1-1}
+\multicolumn{1}{c}{32} \\
+\end{tabular}
+\end{center}
+}
+\caption{RISC-V standard F extension single-precision floating-point state.}
+\label{fprs}
+\end{figure}
+
+\begin{commentary}
+We considered a unified register file for both integer and
+floating-point values as this simplifies software register allocation
+and calling conventions, and reduces total user state. However, a
+split organization increases the total number of registers accessible
+with a given instruction width, simplifies provision of enough regfile
+ports for wide superscalar issue, supports decoupled
+floating-point-unit architectures, and simplifies use of internal
+floating-point encoding techniques. Compiler support and calling
+conventions for split register file architectures are well understood,
+and using dirty bits on floating-point register file state can reduce
+context-switch overhead.
+\end{commentary}
+
+\clearpage
+
+\section{Floating-Point Control and Status Register}
+
+The floating-point control and status register, {\tt fcsr}, is a RISC-V
+control and status register (CSR). It is a 32-bit read/write register that
+selects the dynamic rounding mode for floating-point arithmetic operations and
+holds the accrued exception flags, as shown in Figure~\ref{fcsr}.
+
+\begin{figure*}
+{\footnotesize
+\begin{center}
+\begin{tabular}{K@{}E@{}ccccc}
+\instbitrange{31}{8} &
+\instbitrange{7}{5} &
+\instbit{4} &
+\instbit{3} &
+\instbit{2} &
+\instbit{1} &
+\instbit{0} \\
+\hline
+\multicolumn{1}{|c|}{0} &
+\multicolumn{1}{c|}{Rounding Mode ({\tt frm})} &
+\multicolumn{5}{c|}{Accrued Exceptions ({\tt fflags})} \\
+\hline
+\multicolumn{1}{c}{} &
+\multicolumn{1}{c|}{} &
+\multicolumn{1}{c|}{NV} &
+\multicolumn{1}{c|}{DZ} &
+\multicolumn{1}{c|}{OF} &
+\multicolumn{1}{c|}{UF} &
+\multicolumn{1}{c|}{NX} \\
+\cline{3-7}
+24 & 3 & 1 & 1 & 1 & 1 & 1 \\
+\end{tabular}
+\end{center}
+}
+\vspace{-0.1in}
+\caption{Floating-point control and status register.}
+\label{fcsr}
+\end{figure*}
+
+The {\tt fcsr} register can be read and written with the FRCSR and
+FSCSR instructions, which are assembler pseudo-ops built on the
+underlying CSR access instructions. FRCSR reads {\tt fcsr} by copying
+it into integer register {\em rd}. FSCSR swaps the value in {\tt
+ fcsr} by copying the original value into integer register {\em rd},
+and then writing a new value obtained from integer register {\em rs1}
+into {\tt fcsr}.
+
+The fields within the {\tt fcsr} can also be accessed individually
+through different CSR addresses, and separate assembler pseudo-ops are
+defined for these accesses. The FRRM instruction reads the Rounding
+Mode field {\tt frm} and copies it into the least-significant three
+bits of integer register {\em rd}, with zero in all other bits. FSRM
+swaps the value in {\tt frm} by copying the original value into
+integer register {\em rd}, and then writing a new value obtained from
+the three least-significant bits of integer register {\em rs1} into
+{\tt frm}. FRFLAGS and FSFLAGS are defined analogously for the
+Accrued Exception Flags field {\tt fflags}. Additional
+pseudo-instructions FSRMI and FSFLAGSI swap values using an immediate
+value instead of register {\em rs1}.
+
+Floating-point operations use either a static rounding mode encoded in the
+instruction, or a dynamic rounding mode held in {\tt frm}. Rounding modes are
+encoded as shown in Table~\ref{rm}. A value of 111 in the instruction's {\em
+rm} field selects the dynamic rounding mode held in {\tt frm}. If {\tt frm}
+is set to an invalid value (101--111), any subsequent attempt to execute
+a floating-point operation with a dynamic rounding mode will cause an illegal
+instruction trap. Some instructions that have the {\em rm} field are
+nevertheless unaffected by the rounding mode; they should have their {\em rm}
+field set to RNE (000).
+
+\begin{commentary}
+The C99 language standard effectively mandates the provision of a
+dynamic rounding mode register.
+\end{commentary}
+\newpage
+
+\begin{table}[htp]
+\begin{small}
+\begin{center}
+\begin{tabular}{ccl}
+\hline
+\multicolumn{1}{|c|}{Rounding Mode} &
+\multicolumn{1}{c|}{Mnemonic} &
+\multicolumn{1}{c|}{Meaning} \\
+\hline
+\multicolumn{1}{|c|}{000} &
+\multicolumn{1}{l|}{RNE} &
+\multicolumn{1}{l|}{Round to Nearest, ties to Even}\\
+\hline
+\multicolumn{1}{|c|}{001} &
+\multicolumn{1}{l|}{RTZ} &
+\multicolumn{1}{l|}{Round towards Zero}\\
+\hline
+\multicolumn{1}{|c|}{010} &
+\multicolumn{1}{l|}{RDN} &
+\multicolumn{1}{l|}{Round Down (towards $-\infty$)}\\
+\hline
+\multicolumn{1}{|c|}{011} &
+\multicolumn{1}{l|}{RUP} &
+\multicolumn{1}{l|}{Round Up (towards $+\infty$)}\\
+\hline
+\multicolumn{1}{|c|}{100} &
+\multicolumn{1}{l|}{RMM} &
+\multicolumn{1}{l|}{Round to Nearest, ties to Max Magnitude}\\
+\hline
+\multicolumn{1}{|c|}{101} &
+\multicolumn{1}{l|}{} &
+\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\
+\hline
+\multicolumn{1}{|c|}{110} &
+\multicolumn{1}{l|}{} &
+\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\
+\hline
+\multicolumn{1}{|c|}{111} &
+\multicolumn{1}{l|}{} &
+\multicolumn{1}{l|}{In instruction's {\em rm} field, selects dynamic rounding mode;}\\
+\multicolumn{1}{|c|}{} &
+\multicolumn{1}{l|}{} &
+\multicolumn{1}{l|}{In Rounding Mode register, {\em Invalid}.}\\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{Rounding mode encoding.}
+\label{rm}
+\end{table}
+
+The accrued exception flags indicate the exception conditions that
+have arisen on any floating-point arithmetic instruction since the
+field was last reset by software, as shown in Table~\ref{bitdef}.
+
+\begin{table}[htp]
+\begin{small}
+\begin{center}
+\begin{tabular}{cl}
+\hline
+\multicolumn{1}{|c|}{Flag Mnemonic} &
+\multicolumn{1}{c|}{Flag Meaning} \\
+\hline
+\multicolumn{1}{|c|}{NV} &
+\multicolumn{1}{c|}{Invalid Operation}\\
+\hline
+\multicolumn{1}{|c|}{DZ} &
+\multicolumn{1}{c|}{Divide by Zero}\\
+\hline
+\multicolumn{1}{|c|}{OF} &
+\multicolumn{1}{c|}{Overflow}\\
+\hline
+\multicolumn{1}{|c|}{UF} &
+\multicolumn{1}{c|}{Underflow}\\
+\hline
+\multicolumn{1}{|c|}{NX} &
+\multicolumn{1}{c|}{Inexact}\\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{Accrued exception flag encoding.}
+\label{bitdef}
+\end{table}
+
+\begin{commentary}
+As allowed by the standard, we do not support traps on floating-point
+exceptions in the base ISA, but instead require explicit checks of the flags
+in software. We considered adding branches controlled directly by the
+contents of the floating-point accrued exception flags, but ultimately chose
+to omit these instructions to keep the ISA simple.
+\end{commentary}
+
+\section{NaN Generation and Propagation}
+
+Except when otherwise stated, if the result of a floating-point operation is
+NaN, it is the canonical NaN. The canonical NaN has a positive sign and all
+significand bits clear except the MSB, a.k.a. the quiet bit. For
+single-precision floating-point, this corresponds to the pattern {\tt
+0x7fc00000}.
+
+For FMIN and FMAX, if at least one input is a signaling NaN, or if both inputs
+are quiet NaNs, the result is the canonical NaN. If one operand is a quiet NaN
+and the other is not a NaN, the result is the non-NaN operand.
+
+The sign-injection instructions (FSGNJ, FSGNJN, FSGNJX) do not canonicalize
+NaNs; they manipulate the underlying bit patterns directly.
+
+\begin{commentary}
+We considered propagating NaN payloads, as is recommended by the standard,
+but this decision would have increased hardware cost. Moreover, since this
+feature is optional in the standard, it cannot be used in portable code.
+
+Implementors are free to provide a NaN payload propagation scheme as
+a nonstandard extension enabled by a nonstandard operating mode. However, the
+canonical NaN scheme described above must always be supported and should be
+the default mode.
+\end{commentary}
+
+\begin{commentary}
+We require implementations to return the standard-mandated default
+values in the case of exceptional conditions, without any further
+intervention on the part of user-level software (unlike the Alpha ISA
+floating-point trap barriers). We believe full hardware handling of
+exceptional cases will become more common, and so wish to avoid
+complicating the user-level ISA to optimize other approaches.
+Implementations can always trap to machine-mode software handlers to
+provide exceptional default values.
+\end{commentary}
+
+\section{Subnormal Arithmetic}
+
+Operations on subnormal numbers are handled in accordance with the IEEE
+754-2008 standard.
+
+In the parlance of the IEEE standard, tininess is detected after
+rounding---that is, the underflow exception is raised only if the rounded
+result is subnormal, even if the unrounded result would have been subnormal.
+
+\begin{commentary}
+Detecting tininess after rounding results in fewer spurious underflow signals.
+\end{commentary}
+
+\section{Single-Precision Load and Store Instructions}
+
+Floating-point loads and stores use the same base+offset addressing
+mode as the integer base ISA, with a base address in register {\em
+ rs1} and a 12-bit signed byte offset. The FLW instruction loads a
+single-precision floating-point value from memory into floating-point
+register {\em rd}. FSW stores a single-precision value from
+floating-point register {\em rs2} to memory.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{M@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{imm[11:0]} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{width} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+12 & 5 & 3 & 5 & 7 \\
+offset[11:0] & base & W & dest & LOAD-FP \\
+\end{tabular}
+\end{center}
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{imm[11:5]} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{width} &
+\multicolumn{1}{c|}{imm[4:0]} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+7 & 5 & 5 & 3 & 5 & 7 \\
+offset[11:5] & src & base & W & offset[4:0] & STORE-FP \\
+\end{tabular}
+\end{center}
+
+FLW and FSW are only guaranteed to execute atomically if the effective address
+is naturally aligned.
+
+\section{Single-Precision Floating-Point Computational Instructions}
+\label{sec:single-float-compute}
+
+Floating-point arithmetic instructions with one or two source operands use the
+R-type format with the OP-FP major opcode. FADD.S, FSUB.S,
+FMUL.S, and FDIV.S perform single-precision floating-point addition,
+subtraction, multiplication, and division, respectively, between {\em rs1} and
+{\em rs2}, writing the result to {\em rd}. FMIN.S and FMAX.S
+write, respectively, the smaller or larger of {\em rs1} and {\em rs2} to {\em
+rd}. FSQRT.S computes the square root of {\em rs1} and writes the
+result to {\em rd}.
+
+The 2-bit floating-point format field {\em fmt} is encoded as shown in
+Table~\ref{tab:fmt}. It is set to {\em S} (00) for all instructions in
+the F extension.
+
+\begin{table}[htp]
+\begin{small}
+\begin{center}
+\begin{tabular}{|c|c|l|}
+\hline
+{\em fmt} field &
+Mnemonic &
+Meaning \\
+\hline
+00 & S & 32-bit single-precision \\
+01 & D & 64-bit double-precision \\
+10 & - & {\em reserved} \\
+11 & Q & 128-bit quad-precision \\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{Format field encoding.}
+\label{tab:fmt}
+\end{table}
+
+All floating-point operations that perform rounding can select the
+rounding mode using the {\em rm} field with the encoding shown in
+Table~\ref{rm}.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FADD/FSUB & S & src2 & src1 & RM & dest & OP-FP \\
+FMUL/FDIV & S & src2 & src1 & RM & dest & OP-FP \\
+FMIN-MAX & S & src2 & src1 & MIN/MAX & dest & OP-FP \\
+FSQRT & S & 0 & src & RM & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+Floating-point fused multiply-add instructions require a new standard
+instruction format. R4-type instructions specify three source
+registers ({\em rs1}, {\em rs2}, and {\em rs3}) and a destination
+register ({\em rd}). This format is only used by the floating-point
+fused multiply-add instructions. Fused multiply-add instructions
+multiply the values in {\em rs1} and {\em rs2}, optionally negate the
+product, then add or subtract the value in {\em rs3}, writing the final
+result to {\em rd}.
+FMADD.S computes {\em rs1$\times$rs2+rs3}; FMSUB.S computes
+{\em rs1$\times$rs2-rs3}; FNMSUB.S computes {\em
+ -rs1$\times$rs2+rs3}; and FNMADD.S computes {\em
+ -rs1$\times$rs2-rs3}.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{rs3} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+src3 & S & src2 & src1 & RM & dest & F[N]MADD/F[N]MSUB \\
+\end{tabular}
+\end{center}
+
+\section{Single-Precision Floating-Point Conversion and Move \mbox{Instructions}}
+
+Floating-point-to-integer and integer-to-floating-point conversion
+instructions are encoded in the OP-FP major opcode space.
+FCVT.W.S or FCVT.L.S converts a floating-point number
+in floating-point register {\em rs1} to a signed 32-bit or 64-bit
+integer, respectively, in integer register {\em rd}. FCVT.S.W
+or FCVT.S.L converts a 32-bit or 64-bit signed integer,
+respectively, in integer register {\em rs1} into a floating-point
+number in floating-point register {\em rd}. FCVT.WU.S,
+FCVT.LU.S, FCVT.S.WU, and FCVT.S.LU variants
+convert to or from unsigned integer values. FCVT.L[U].S and
+FCVT.S.L[U] are illegal in RV32.
+If the rounded result is not representable in the destination format,
+it is clipped to the nearest value and the invalid flag is set.
+Table~\ref{tab:int_conv} gives the range of valid inputs for FCVT.{\em int}.S
+and the behavior for invalid inputs.
+
+\begin{table}[htp]
+\begin{small}
+\begin{center}
+\begin{tabular}{|l|r|r|r|r|}
+\hline
+ & FCVT.W.S & FCVT.WU.S & FCVT.L.S & FCVT.LU.S \\
+\hline
+Minimum valid input (after rounding) & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\
+Maximum valid input (after rounding) & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\
+\hline
+Output for out-of-range negative input & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\
+Output for $-\infty$ & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\
+Output for out-of-range positive input & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\
+Output for $+\infty$ or NaN & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{Domains of float-to-integer conversions and behavior for invalid inputs.}
+\label{tab:int_conv}
+\end{table}
+
+All floating-point to integer and integer to floating-point conversion
+instructions round according to the {\em rm} field. A floating-point register
+can be initialized to floating-point positive zero using FCVT.S.W {\em rd},
+{\tt x0}, which will never raise any exceptions.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FCVT.{\em int}.{\em fmt} & S & W[U]/L[U] & src & RM & dest & OP-FP \\
+FCVT.{\em fmt}.{\em int} & S & W[U]/L[U] & src & RM & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+Floating-point to floating-point sign-injection instructions, FSGNJ.S,
+FSGNJN.S, and FSGNJX.S, produce a result that takes all bits except
+the sign bit from {\em rs1}. For FSGNJ, the result's sign bit is {\em
+ rs2}'s sign bit; for FSGNJN, the result's sign bit is the opposite
+of {\em rs2}'s sign bit; and for FSGNJX, the sign bit is the XOR of
+the sign bits of {\em rs1} and {\em rs2}. Sign-injection instructions
+do not set floating-point exception flags. Note, FSGNJ.S {\em rx, ry,
+ ry} moves {\em ry} to {\em rx} (assembler pseudo-op FMV.S {\em rx,
+ ry}); FSGNJN.S {\em rx, ry, ry} moves the the negation of {\em ry} to
+{\em rx} (assembler pseudo-op FNEG.S {\em rx, ry}); and FSGNJX.S {\em rx,
+ ry, ry} moves the absolute value of {\em ry} to {\em rx} (assembler
+pseudo-op FABS.S {\em rx, ry}).
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FSGNJ & S & src2 & src1 & J[N]/JX & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+\begin{commentary}
+The sign-injection instructions
+provide floating-point MV, ABS, and NEG,
+as well as supporting a few other operations, including the IEEE copySign
+operation and sign manipulation in transcendental math function
+libraries. Although MV, ABS, and NEG only need a single register
+operand, whereas FSGNJ instructions need two, it is unlikely most
+microarchitectures would add optimizations to benefit from the reduced
+number of register reads for these relatively infrequent instructions.
+Even in this case, a microarchitecture can simply detect when both
+source registers are the same for FSGNJ instructions and only read a
+single copy.
+\end{commentary}
+
+Instructions are provided to move bit patterns between the
+floating-point and integer registers. FMV.X.S moves the
+single-precision value in floating-point register {\em rs1}
+represented in IEEE 754-2008 encoding to the lower 32 bits of integer
+register {\em rd}. For RV64, the higher 32 bits of the destination
+register are filled with copies of the floating-point number's sign
+bit. FMV.S.X moves the single-precision value encoded in IEEE
+754-2008 standard encoding from the lower 32 bits of integer register
+{\em rs1} to the floating-point register {\em rd}. The bits are not
+modified in the transfer, and in particular, the payloads of
+non-canonical NaNs are preserved.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FMV.X.{\em fmt} & S & 0 & src & 000 & dest & OP-FP \\
+FMV.{\em fmt}.X & S & 0 & src & 000 & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+\begin{commentary}
+The base floating-point ISA was defined so as to allow implementations
+to employ an internal recoding of the floating-point format in
+registers to simplify handling of subnormal values and possibly to
+reduce functional unit latency. To this end, the base ISA avoids
+representing integer values in the floating-point registers by
+defining conversion and comparison operations that read and write the
+integer register file directly. This also removes many of the common
+cases where explicit moves between integer and floating-point
+registers are required, reducing instruction count and critical paths
+for common mixed-format code sequences.
+\end{commentary}
+
+\section{Single-Precision Floating-Point Compare Instructions}
+
+Floating-point compare instructions perform the specified comparison (equal,
+less than, or less than or equal) between floating-point registers {\em rs1}
+and {\em rs2} and record the Boolean result in integer register {\em rd}.
+
+FLT.S and FLE.S perform what the IEEE 754-2008 standard refers to as {\em
+signaling} comparisons: that is, an Invalid Operation exception is raised if
+either input is NaN. FEQ.S performs a {\em quiet} comparison: only signaling
+NaN inputs cause an Invalid Operation exception. For all three instructions,
+the result is 0 if either operand is NaN.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FCMP & S & src2 & src1 & EQ/LT/LE & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+\section{Single-Precision Floating-Point Classify Instruction}
+
+The FCLASS.S instruction examines the value in floating-point register {\em
+rs1} and writes to integer register {\em rd} a 10-bit mask that indicates
+the class of the floating-point number. The format of the mask is
+described in Table~\ref{tab:fclass}. The corresponding bit in {\em rd} will
+be set if the the property is true and clear otherwise. All other bits in
+{\em rd} are cleared. Note that exactly one bit in {\em rd} will be set.
+
+\vspace{-0.2in}
+\begin{center}
+\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O}
+\\
+\instbitrange{31}{27} &
+\instbitrange{26}{25} &
+\instbitrange{24}{20} &
+\instbitrange{19}{15} &
+\instbitrange{14}{12} &
+\instbitrange{11}{7} &
+\instbitrange{6}{0} \\
+\hline
+\multicolumn{1}{|c|}{funct5} &
+\multicolumn{1}{c|}{fmt} &
+\multicolumn{1}{c|}{rs2} &
+\multicolumn{1}{c|}{rs1} &
+\multicolumn{1}{c|}{rm} &
+\multicolumn{1}{c|}{rd} &
+\multicolumn{1}{c|}{opcode} \\
+\hline
+5 & 2 & 5 & 5 & 3 & 5 & 7 \\
+FCLASS & S & 0 & src & 001 & dest & OP-FP \\
+\end{tabular}
+\end{center}
+
+\begin{table}[htp]
+\begin{small}
+\begin{center}
+\begin{tabular}{|c|l|}
+\hline
+{\em rd} bit &
+Meaning \\
+\hline
+0 & {\em rs1} is $-\infty$. \\
+1 & {\em rs1} is a negative normal number. \\
+2 & {\em rs1} is a negative subnormal number. \\
+3 & {\em rs1} is $-0$. \\
+4 & {\em rs1} is $+0$. \\
+5 & {\em rs1} is a positive subnormal number. \\
+6 & {\em rs1} is a positive normal number. \\
+7 & {\em rs1} is $+\infty$. \\
+8 & {\em rs1} is a signaling NaN. \\
+9 & {\em rs1} is a quiet NaN. \\
+\hline
+\end{tabular}
+\end{center}
+\end{small}
+\caption{Format of result of FCLASS instruction.}
+\label{tab:fclass}
+\end{table}