diff options
Diffstat (limited to 'src/f.tex')
-rw-r--r-- | src/f.tex | 742 |
1 files changed, 742 insertions, 0 deletions
diff --git a/src/f.tex b/src/f.tex new file mode 100644 index 0000000..94c0920 --- /dev/null +++ b/src/f.tex @@ -0,0 +1,742 @@ +\chapter{``F'' Standard Extension for Single-Precision Floating-Point, +Version 2.0} +\label{sec:single-float} + +This chapter describes the standard instruction-set extension for +single-precision floating-point, which is named ``F'' and adds +single-precision floating-point computational instructions compliant +with the IEEE 754-2008 arithmetic standard~\cite{ieee754-2008}. + + +\section{F Register State} + +The F extension adds 32 floating-point registers, {\tt f0}--{\tt f31}, +each 32 bits wide, and a floating-point control and status register +{\tt fcsr}, which contains the operating mode and exception status of the +floating-point unit. This additional state is shown in +Figure~\ref{fprs}. We use the term FLEN to describe the width of the +floating-point registers in the RISC-V ISA, and FLEN=32 for the F +single-precision floating-point extension. Most floating-point +instructions operate on values in the floating-point register file. +Floating-point load and store instructions transfer floating-point +values between registers and memory. Instructions to transfer values +to and from the integer register file are also provided. + +\begin{figure}[htbp] +{\footnotesize +\begin{center} +\begin{tabular}{p{2in}} +\instbitrange{FLEN-1}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f0\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f1\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f2\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f3\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f4\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f5\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f6\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f7\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f8\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ \ f9\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f10\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f11\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f12\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f13\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f14\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f15\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f16\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f17\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f18\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f19\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f20\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f21\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f22\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f23\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f24\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f25\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f26\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f27\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f28\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f29\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f30\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{\ \ \ f31\ \ \ \ \ }} \\ \cline{1-1} +\multicolumn{1}{c}{FLEN} \\ + +\instbitrange{31}{0} \\ \cline{1-1} +\multicolumn{1}{|c|}{\reglabel{fcsr}} \\ \cline{1-1} +\multicolumn{1}{c}{32} \\ +\end{tabular} +\end{center} +} +\caption{RISC-V standard F extension single-precision floating-point state.} +\label{fprs} +\end{figure} + +\begin{commentary} +We considered a unified register file for both integer and +floating-point values as this simplifies software register allocation +and calling conventions, and reduces total user state. However, a +split organization increases the total number of registers accessible +with a given instruction width, simplifies provision of enough regfile +ports for wide superscalar issue, supports decoupled +floating-point-unit architectures, and simplifies use of internal +floating-point encoding techniques. Compiler support and calling +conventions for split register file architectures are well understood, +and using dirty bits on floating-point register file state can reduce +context-switch overhead. +\end{commentary} + +\clearpage + +\section{Floating-Point Control and Status Register} + +The floating-point control and status register, {\tt fcsr}, is a RISC-V +control and status register (CSR). It is a 32-bit read/write register that +selects the dynamic rounding mode for floating-point arithmetic operations and +holds the accrued exception flags, as shown in Figure~\ref{fcsr}. + +\begin{figure*} +{\footnotesize +\begin{center} +\begin{tabular}{K@{}E@{}ccccc} +\instbitrange{31}{8} & +\instbitrange{7}{5} & +\instbit{4} & +\instbit{3} & +\instbit{2} & +\instbit{1} & +\instbit{0} \\ +\hline +\multicolumn{1}{|c|}{0} & +\multicolumn{1}{c|}{Rounding Mode ({\tt frm})} & +\multicolumn{5}{c|}{Accrued Exceptions ({\tt fflags})} \\ +\hline +\multicolumn{1}{c}{} & +\multicolumn{1}{c|}{} & +\multicolumn{1}{c|}{NV} & +\multicolumn{1}{c|}{DZ} & +\multicolumn{1}{c|}{OF} & +\multicolumn{1}{c|}{UF} & +\multicolumn{1}{c|}{NX} \\ +\cline{3-7} +24 & 3 & 1 & 1 & 1 & 1 & 1 \\ +\end{tabular} +\end{center} +} +\vspace{-0.1in} +\caption{Floating-point control and status register.} +\label{fcsr} +\end{figure*} + +The {\tt fcsr} register can be read and written with the FRCSR and +FSCSR instructions, which are assembler pseudo-ops built on the +underlying CSR access instructions. FRCSR reads {\tt fcsr} by copying +it into integer register {\em rd}. FSCSR swaps the value in {\tt + fcsr} by copying the original value into integer register {\em rd}, +and then writing a new value obtained from integer register {\em rs1} +into {\tt fcsr}. + +The fields within the {\tt fcsr} can also be accessed individually +through different CSR addresses, and separate assembler pseudo-ops are +defined for these accesses. The FRRM instruction reads the Rounding +Mode field {\tt frm} and copies it into the least-significant three +bits of integer register {\em rd}, with zero in all other bits. FSRM +swaps the value in {\tt frm} by copying the original value into +integer register {\em rd}, and then writing a new value obtained from +the three least-significant bits of integer register {\em rs1} into +{\tt frm}. FRFLAGS and FSFLAGS are defined analogously for the +Accrued Exception Flags field {\tt fflags}. Additional +pseudo-instructions FSRMI and FSFLAGSI swap values using an immediate +value instead of register {\em rs1}. + +Floating-point operations use either a static rounding mode encoded in the +instruction, or a dynamic rounding mode held in {\tt frm}. Rounding modes are +encoded as shown in Table~\ref{rm}. A value of 111 in the instruction's {\em +rm} field selects the dynamic rounding mode held in {\tt frm}. If {\tt frm} +is set to an invalid value (101--111), any subsequent attempt to execute +a floating-point operation with a dynamic rounding mode will cause an illegal +instruction trap. Some instructions that have the {\em rm} field are +nevertheless unaffected by the rounding mode; they should have their {\em rm} +field set to RNE (000). + +\begin{commentary} +The C99 language standard effectively mandates the provision of a +dynamic rounding mode register. +\end{commentary} +\newpage + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{ccl} +\hline +\multicolumn{1}{|c|}{Rounding Mode} & +\multicolumn{1}{c|}{Mnemonic} & +\multicolumn{1}{c|}{Meaning} \\ +\hline +\multicolumn{1}{|c|}{000} & +\multicolumn{1}{l|}{RNE} & +\multicolumn{1}{l|}{Round to Nearest, ties to Even}\\ +\hline +\multicolumn{1}{|c|}{001} & +\multicolumn{1}{l|}{RTZ} & +\multicolumn{1}{l|}{Round towards Zero}\\ +\hline +\multicolumn{1}{|c|}{010} & +\multicolumn{1}{l|}{RDN} & +\multicolumn{1}{l|}{Round Down (towards $-\infty$)}\\ +\hline +\multicolumn{1}{|c|}{011} & +\multicolumn{1}{l|}{RUP} & +\multicolumn{1}{l|}{Round Up (towards $+\infty$)}\\ +\hline +\multicolumn{1}{|c|}{100} & +\multicolumn{1}{l|}{RMM} & +\multicolumn{1}{l|}{Round to Nearest, ties to Max Magnitude}\\ +\hline +\multicolumn{1}{|c|}{101} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\ +\hline +\multicolumn{1}{|c|}{110} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{\em Invalid. Reserved for future use.}\\ +\hline +\multicolumn{1}{|c|}{111} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{In instruction's {\em rm} field, selects dynamic rounding mode;}\\ +\multicolumn{1}{|c|}{} & +\multicolumn{1}{l|}{} & +\multicolumn{1}{l|}{In Rounding Mode register, {\em Invalid}.}\\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Rounding mode encoding.} +\label{rm} +\end{table} + +The accrued exception flags indicate the exception conditions that +have arisen on any floating-point arithmetic instruction since the +field was last reset by software, as shown in Table~\ref{bitdef}. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{cl} +\hline +\multicolumn{1}{|c|}{Flag Mnemonic} & +\multicolumn{1}{c|}{Flag Meaning} \\ +\hline +\multicolumn{1}{|c|}{NV} & +\multicolumn{1}{c|}{Invalid Operation}\\ +\hline +\multicolumn{1}{|c|}{DZ} & +\multicolumn{1}{c|}{Divide by Zero}\\ +\hline +\multicolumn{1}{|c|}{OF} & +\multicolumn{1}{c|}{Overflow}\\ +\hline +\multicolumn{1}{|c|}{UF} & +\multicolumn{1}{c|}{Underflow}\\ +\hline +\multicolumn{1}{|c|}{NX} & +\multicolumn{1}{c|}{Inexact}\\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Accrued exception flag encoding.} +\label{bitdef} +\end{table} + +\begin{commentary} +As allowed by the standard, we do not support traps on floating-point +exceptions in the base ISA, but instead require explicit checks of the flags +in software. We considered adding branches controlled directly by the +contents of the floating-point accrued exception flags, but ultimately chose +to omit these instructions to keep the ISA simple. +\end{commentary} + +\section{NaN Generation and Propagation} + +Except when otherwise stated, if the result of a floating-point operation is +NaN, it is the canonical NaN. The canonical NaN has a positive sign and all +significand bits clear except the MSB, a.k.a. the quiet bit. For +single-precision floating-point, this corresponds to the pattern {\tt +0x7fc00000}. + +For FMIN and FMAX, if at least one input is a signaling NaN, or if both inputs +are quiet NaNs, the result is the canonical NaN. If one operand is a quiet NaN +and the other is not a NaN, the result is the non-NaN operand. + +The sign-injection instructions (FSGNJ, FSGNJN, FSGNJX) do not canonicalize +NaNs; they manipulate the underlying bit patterns directly. + +\begin{commentary} +We considered propagating NaN payloads, as is recommended by the standard, +but this decision would have increased hardware cost. Moreover, since this +feature is optional in the standard, it cannot be used in portable code. + +Implementors are free to provide a NaN payload propagation scheme as +a nonstandard extension enabled by a nonstandard operating mode. However, the +canonical NaN scheme described above must always be supported and should be +the default mode. +\end{commentary} + +\begin{commentary} +We require implementations to return the standard-mandated default +values in the case of exceptional conditions, without any further +intervention on the part of user-level software (unlike the Alpha ISA +floating-point trap barriers). We believe full hardware handling of +exceptional cases will become more common, and so wish to avoid +complicating the user-level ISA to optimize other approaches. +Implementations can always trap to machine-mode software handlers to +provide exceptional default values. +\end{commentary} + +\section{Subnormal Arithmetic} + +Operations on subnormal numbers are handled in accordance with the IEEE +754-2008 standard. + +In the parlance of the IEEE standard, tininess is detected after +rounding---that is, the underflow exception is raised only if the rounded +result is subnormal, even if the unrounded result would have been subnormal. + +\begin{commentary} +Detecting tininess after rounding results in fewer spurious underflow signals. +\end{commentary} + +\section{Single-Precision Load and Store Instructions} + +Floating-point loads and stores use the same base+offset addressing +mode as the integer base ISA, with a base address in register {\em + rs1} and a 12-bit signed byte offset. The FLW instruction loads a +single-precision floating-point value from memory into floating-point +register {\em rd}. FSW stores a single-precision value from +floating-point register {\em rs2} to memory. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{M@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:0]} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +12 & 5 & 3 & 5 & 7 \\ +offset[11:0] & base & W & dest & LOAD-FP \\ +\end{tabular} +\end{center} + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{O@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{imm[11:5]} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{width} & +\multicolumn{1}{c|}{imm[4:0]} & +\multicolumn{1}{c|}{opcode} \\ +\hline +7 & 5 & 5 & 3 & 5 & 7 \\ +offset[11:5] & src & base & W & offset[4:0] & STORE-FP \\ +\end{tabular} +\end{center} + +FLW and FSW are only guaranteed to execute atomically if the effective address +is naturally aligned. + +\section{Single-Precision Floating-Point Computational Instructions} +\label{sec:single-float-compute} + +Floating-point arithmetic instructions with one or two source operands use the +R-type format with the OP-FP major opcode. FADD.S, FSUB.S, +FMUL.S, and FDIV.S perform single-precision floating-point addition, +subtraction, multiplication, and division, respectively, between {\em rs1} and +{\em rs2}, writing the result to {\em rd}. FMIN.S and FMAX.S +write, respectively, the smaller or larger of {\em rs1} and {\em rs2} to {\em +rd}. FSQRT.S computes the square root of {\em rs1} and writes the +result to {\em rd}. + +The 2-bit floating-point format field {\em fmt} is encoded as shown in +Table~\ref{tab:fmt}. It is set to {\em S} (00) for all instructions in +the F extension. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|c|c|l|} +\hline +{\em fmt} field & +Mnemonic & +Meaning \\ +\hline +00 & S & 32-bit single-precision \\ +01 & D & 64-bit double-precision \\ +10 & - & {\em reserved} \\ +11 & Q & 128-bit quad-precision \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Format field encoding.} +\label{tab:fmt} +\end{table} + +All floating-point operations that perform rounding can select the +rounding mode using the {\em rm} field with the encoding shown in +Table~\ref{rm}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FADD/FSUB & S & src2 & src1 & RM & dest & OP-FP \\ +FMUL/FDIV & S & src2 & src1 & RM & dest & OP-FP \\ +FMIN-MAX & S & src2 & src1 & MIN/MAX & dest & OP-FP \\ +FSQRT & S & 0 & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point fused multiply-add instructions require a new standard +instruction format. R4-type instructions specify three source +registers ({\em rs1}, {\em rs2}, and {\em rs3}) and a destination +register ({\em rd}). This format is only used by the floating-point +fused multiply-add instructions. Fused multiply-add instructions +multiply the values in {\em rs1} and {\em rs2}, optionally negate the +product, then add or subtract the value in {\em rs3}, writing the final +result to {\em rd}. +FMADD.S computes {\em rs1$\times$rs2+rs3}; FMSUB.S computes +{\em rs1$\times$rs2-rs3}; FNMSUB.S computes {\em + -rs1$\times$rs2+rs3}; and FNMADD.S computes {\em + -rs1$\times$rs2-rs3}. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{rs3} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +src3 & S & src2 & src1 & RM & dest & F[N]MADD/F[N]MSUB \\ +\end{tabular} +\end{center} + +\section{Single-Precision Floating-Point Conversion and Move \mbox{Instructions}} + +Floating-point-to-integer and integer-to-floating-point conversion +instructions are encoded in the OP-FP major opcode space. +FCVT.W.S or FCVT.L.S converts a floating-point number +in floating-point register {\em rs1} to a signed 32-bit or 64-bit +integer, respectively, in integer register {\em rd}. FCVT.S.W +or FCVT.S.L converts a 32-bit or 64-bit signed integer, +respectively, in integer register {\em rs1} into a floating-point +number in floating-point register {\em rd}. FCVT.WU.S, +FCVT.LU.S, FCVT.S.WU, and FCVT.S.LU variants +convert to or from unsigned integer values. FCVT.L[U].S and +FCVT.S.L[U] are illegal in RV32. +If the rounded result is not representable in the destination format, +it is clipped to the nearest value and the invalid flag is set. +Table~\ref{tab:int_conv} gives the range of valid inputs for FCVT.{\em int}.S +and the behavior for invalid inputs. + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|l|r|r|r|r|} +\hline + & FCVT.W.S & FCVT.WU.S & FCVT.L.S & FCVT.LU.S \\ +\hline +Minimum valid input (after rounding) & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Maximum valid input (after rounding) & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +\hline +Output for out-of-range negative input & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Output for $-\infty$ & $-2^{31}$ & 0 & $-2^{63}$ & 0 \\ +Output for out-of-range positive input & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +Output for $+\infty$ or NaN & $2^{31}-1$ & $2^{32}-1$ & $2^{63}-1$ & $2^{64}-1$ \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Domains of float-to-integer conversions and behavior for invalid inputs.} +\label{tab:int_conv} +\end{table} + +All floating-point to integer and integer to floating-point conversion +instructions round according to the {\em rm} field. A floating-point register +can be initialized to floating-point positive zero using FCVT.S.W {\em rd}, +{\tt x0}, which will never raise any exceptions. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCVT.{\em int}.{\em fmt} & S & W[U]/L[U] & src & RM & dest & OP-FP \\ +FCVT.{\em fmt}.{\em int} & S & W[U]/L[U] & src & RM & dest & OP-FP \\ +\end{tabular} +\end{center} + +Floating-point to floating-point sign-injection instructions, FSGNJ.S, +FSGNJN.S, and FSGNJX.S, produce a result that takes all bits except +the sign bit from {\em rs1}. For FSGNJ, the result's sign bit is {\em + rs2}'s sign bit; for FSGNJN, the result's sign bit is the opposite +of {\em rs2}'s sign bit; and for FSGNJX, the sign bit is the XOR of +the sign bits of {\em rs1} and {\em rs2}. Sign-injection instructions +do not set floating-point exception flags. Note, FSGNJ.S {\em rx, ry, + ry} moves {\em ry} to {\em rx} (assembler pseudo-op FMV.S {\em rx, + ry}); FSGNJN.S {\em rx, ry, ry} moves the the negation of {\em ry} to +{\em rx} (assembler pseudo-op FNEG.S {\em rx, ry}); and FSGNJX.S {\em rx, + ry, ry} moves the absolute value of {\em ry} to {\em rx} (assembler +pseudo-op FABS.S {\em rx, ry}). + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FSGNJ & S & src2 & src1 & J[N]/JX & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{commentary} +The sign-injection instructions +provide floating-point MV, ABS, and NEG, +as well as supporting a few other operations, including the IEEE copySign +operation and sign manipulation in transcendental math function +libraries. Although MV, ABS, and NEG only need a single register +operand, whereas FSGNJ instructions need two, it is unlikely most +microarchitectures would add optimizations to benefit from the reduced +number of register reads for these relatively infrequent instructions. +Even in this case, a microarchitecture can simply detect when both +source registers are the same for FSGNJ instructions and only read a +single copy. +\end{commentary} + +Instructions are provided to move bit patterns between the +floating-point and integer registers. FMV.X.S moves the +single-precision value in floating-point register {\em rs1} +represented in IEEE 754-2008 encoding to the lower 32 bits of integer +register {\em rd}. For RV64, the higher 32 bits of the destination +register are filled with copies of the floating-point number's sign +bit. FMV.S.X moves the single-precision value encoded in IEEE +754-2008 standard encoding from the lower 32 bits of integer register +{\em rs1} to the floating-point register {\em rd}. The bits are not +modified in the transfer, and in particular, the payloads of +non-canonical NaNs are preserved. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{R@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FMV.X.{\em fmt} & S & 0 & src & 000 & dest & OP-FP \\ +FMV.{\em fmt}.X & S & 0 & src & 000 & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{commentary} +The base floating-point ISA was defined so as to allow implementations +to employ an internal recoding of the floating-point format in +registers to simplify handling of subnormal values and possibly to +reduce functional unit latency. To this end, the base ISA avoids +representing integer values in the floating-point registers by +defining conversion and comparison operations that read and write the +integer register file directly. This also removes many of the common +cases where explicit moves between integer and floating-point +registers are required, reducing instruction count and critical paths +for common mixed-format code sequences. +\end{commentary} + +\section{Single-Precision Floating-Point Compare Instructions} + +Floating-point compare instructions perform the specified comparison (equal, +less than, or less than or equal) between floating-point registers {\em rs1} +and {\em rs2} and record the Boolean result in integer register {\em rd}. + +FLT.S and FLE.S perform what the IEEE 754-2008 standard refers to as {\em +signaling} comparisons: that is, an Invalid Operation exception is raised if +either input is NaN. FEQ.S performs a {\em quiet} comparison: only signaling +NaN inputs cause an Invalid Operation exception. For all three instructions, +the result is 0 if either operand is NaN. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCMP & S & src2 & src1 & EQ/LT/LE & dest & OP-FP \\ +\end{tabular} +\end{center} + +\section{Single-Precision Floating-Point Classify Instruction} + +The FCLASS.S instruction examines the value in floating-point register {\em +rs1} and writes to integer register {\em rd} a 10-bit mask that indicates +the class of the floating-point number. The format of the mask is +described in Table~\ref{tab:fclass}. The corresponding bit in {\em rd} will +be set if the the property is true and clear otherwise. All other bits in +{\em rd} are cleared. Note that exactly one bit in {\em rd} will be set. + +\vspace{-0.2in} +\begin{center} +\begin{tabular}{S@{}F@{}R@{}R@{}F@{}R@{}O} +\\ +\instbitrange{31}{27} & +\instbitrange{26}{25} & +\instbitrange{24}{20} & +\instbitrange{19}{15} & +\instbitrange{14}{12} & +\instbitrange{11}{7} & +\instbitrange{6}{0} \\ +\hline +\multicolumn{1}{|c|}{funct5} & +\multicolumn{1}{c|}{fmt} & +\multicolumn{1}{c|}{rs2} & +\multicolumn{1}{c|}{rs1} & +\multicolumn{1}{c|}{rm} & +\multicolumn{1}{c|}{rd} & +\multicolumn{1}{c|}{opcode} \\ +\hline +5 & 2 & 5 & 5 & 3 & 5 & 7 \\ +FCLASS & S & 0 & src & 001 & dest & OP-FP \\ +\end{tabular} +\end{center} + +\begin{table}[htp] +\begin{small} +\begin{center} +\begin{tabular}{|c|l|} +\hline +{\em rd} bit & +Meaning \\ +\hline +0 & {\em rs1} is $-\infty$. \\ +1 & {\em rs1} is a negative normal number. \\ +2 & {\em rs1} is a negative subnormal number. \\ +3 & {\em rs1} is $-0$. \\ +4 & {\em rs1} is $+0$. \\ +5 & {\em rs1} is a positive subnormal number. \\ +6 & {\em rs1} is a positive normal number. \\ +7 & {\em rs1} is $+\infty$. \\ +8 & {\em rs1} is a signaling NaN. \\ +9 & {\em rs1} is a quiet NaN. \\ +\hline +\end{tabular} +\end{center} +\end{small} +\caption{Format of result of FCLASS instruction.} +\label{tab:fclass} +\end{table} |