diff options
author | wmat <wmat@riscv.org> | 2024-03-13 11:54:05 -0400 |
---|---|---|
committer | wmat <wmat@riscv.org> | 2024-03-13 11:54:05 -0400 |
commit | 4ce01b02f490d0167d2abd03923a5508d70756e1 (patch) | |
tree | 48703e1c135e90a72e2bb33d9ba15af155093eff /src/scalar-crypto.adoc | |
parent | 169ef9f88a7c725f143908f0973d3d702a76210f (diff) | |
download | riscv-isa-manual-4ce01b02f490d0167d2abd03923a5508d70756e1.zip riscv-isa-manual-4ce01b02f490d0167d2abd03923a5508d70756e1.tar.gz riscv-isa-manual-4ce01b02f490d0167d2abd03923a5508d70756e1.tar.bz2 |
Adding all instructions and entropy chapter.
Adding all the instructions and the entropy chapter.
Diffstat (limited to 'src/scalar-crypto.adoc')
-rw-r--r-- | src/scalar-crypto.adoc | 2525 |
1 files changed, 2525 insertions, 0 deletions
diff --git a/src/scalar-crypto.adoc b/src/scalar-crypto.adoc index d496749..486359c 100644 --- a/src/scalar-crypto.adoc +++ b/src/scalar-crypto.adoc @@ -581,6 +581,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes32dsmi, reftext="AES middle round decrypt (RV32)"] ==== aes32dsmi @@ -642,6 +644,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes32esi, reftext="AES final round encrypt (RV32)"] ==== aes32esi @@ -702,6 +706,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes32esmi, reftext="AES middle round encrypt (RV32)"] ==== aes32esmi @@ -763,6 +769,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64ds, reftext="AES decrypt final round (RV64)"] ==== aes64ds @@ -835,6 +843,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64dsm, reftext="AES decrypt middle round (RV64)"] ==== aes64dsm @@ -908,6 +918,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64es, reftext="AES encrypt final round instruction (RV64)"] ==== aes64es @@ -980,6 +992,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64esm, reftext="AES encrypt middle round instruction (RV64)"] ==== aes64esm @@ -1053,6 +1067,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64im, reftext="AES Decrypt KeySchedule MixColumns (RV64)"] ==== aes64im @@ -1116,6 +1132,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64ks1i, reftext="AES Key Schedule Instruction 1 (RV64)"] ==== aes64ks1i @@ -1188,6 +1206,8 @@ Included in:: | Frozen |=== +<<< + [#insns-aes64ks2, reftext="AES Key Schedule Instruction 2 (RV64)"] ==== aes64ks2 @@ -1250,4 +1270,2509 @@ Included in:: | Frozen |=== +<<< + +[#insns-andn,reftext="AND with inverted operand"] +==== andn + +Synopsis:: +AND with inverted operand + +Mnemonic:: +andn _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x7, attr: ['ANDN']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x20, attr: ['ANDN'] }, +]} +.... + +Description:: +This instruction performs the bitwise logical AND operation between _rs1_ and the bitwise inversion of _rs2_. + +Operation:: +[source,sail] +-- +X(rd) = X(rs1) & ~X(rs2); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-clmul,reftext="Carry-less multiply (low-part)"] +==== clmul + +Synopsis:: +Carry-less multiply (low-part) + +Mnemonic:: +clmul _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x1, attr: ['CLMUL'] }, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x5, attr: ['MINMAX/CLMUL'] }, +]} +.... + +Description:: +clmul produces the lower half of the 2·XLEN carry-less product. + +Operation:: +[source,sail] +-- +let rs1_val = X(rs1); +let rs2_val = X(rs2); +let output : xlenbits = 0; + +foreach (i from 0 to (xlen - 1) by 1) { + output = if ((rs2_val >> i) & 1) + then output ^ (rs1_val << i); + else output; +} + +X[rd] = output +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbc (<<#zbc>>) +|1.0.0 +|Frozen + +|Zbkc (<<#zbkc>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-clmulh,reftext="Carry-less multiply (high-part)"] +==== clmulh + +Synopsis:: +Carry-less multiply (high-part) + +Mnemonic:: +clmulh _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x3, attr: ['CLMULH'] }, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x5, attr: ['MINMAX/CLMUL'] }, +]} +.... + +Description:: +clmulh produces the upper half of the 2·XLEN carry-less product. + +Operation:: +[source,sail] +-- +let rs1_val = X(rs1); +let rs2_val = X(rs2); +let output : xlenbits = 0; + +foreach (i from 1 to xlen by 1) { + output = if ((rs2_val >> i) & 1) + then output ^ (rs1_val >> (xlen - i)); + else output; +} + +X[rd] = output +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbc (<<#zbc>>) +|1.0.0 +|Frozen + +|Zbkc (<<#zbkc>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-orn,reftext="OR with inverted operand"] +==== orn + +Synopsis:: +OR with inverted operand + +Mnemonic:: +orn _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x6, attr: ['ORN']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x20, attr: ['ORN'] }, +]} +.... + +Description:: +This instruction performs the bitwise logical OR operation between _rs1_ and the bitwise inversion of _rs2_. + +Operation:: +[source,sail] +-- +X(rd) = X(rs1) | ~X(rs2); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-pack,reftext="Pack low halves of registers"] +==== pack + +Synopsis:: +Pack the low halves of _rs1_ and _rs2_ into _rd_. + +Mnemonic:: +pack _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + {bits: 7, name: 0x33, attr: ['OP'] }, + {bits: 5, name: 'rd'}, + {bits: 3, name: 0x4, attr:['PACK']}, + {bits: 5, name: 'rs1'}, + {bits: 5, name: 'rs2'}, + {bits: 7, name: 0x4, attr:['PACK']}, +]} +.... + +Description:: +The pack instruction packs the XLEN/2-bit lower halves of _rs1_ and _rs2_ into +_rd_, with _rs1_ in the lower half and _rs2_ in the upper half. + +Operation:: +[source,sail] +-- +let lo_half : bits(xlen/2) = X(rs1)[xlen/2-1..0]; +let hi_half : bits(xlen/2) = X(rs2)[xlen/2-1..0]; +X(rd) = EXTZ(hi_half @ lo_half); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-packh,reftext="Pack low bytes of registers"] +==== packh + +Synopsis:: +Pack the low bytes of _rs1_ and _rs2_ into _rd_. + +Mnemonic:: +packh _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + {bits: 7, name: 0x33, attr: ['OP'] }, + {bits: 5, name: 'rd'}, + {bits: 3, name: 0x7, attr: ['PACKH']}, + {bits: 5, name: 'rs1'}, + {bits: 5, name: 'rs2'}, + {bits: 7, name: 0x4, attr: ['PACKH']}, +]} +.... + +Description:: +And the packh instruction packs the least-significant bytes of +_rs1_ and _rs2_ into the 16 least-significant bits of _rd_, +zero extending the rest of _rd_. + +Operation:: +[source,sail] +-- +let lo_half : bits(8) = X(rs1)[7..0]; +let hi_half : bits(8) = X(rs2)[7..0]; +X(rd) = EXTZ(hi_half @ lo_half); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-packw,reftext="Pack low 16-bits of registers (RV64)"] +==== packw + +Synopsis:: +Pack the low 16-bits of _rs1_ and _rs2_ into _rd_ on RV64. + +Mnemonic:: +packw _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 2, name: 0x3}, +{bits: 5, name: 0xe}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x4}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 7, name: 0x4}, +]} +.... + +Description:: +This instruction packs the low 16 bits of +_rs1_ and _rs2_ into the 32 least-significant bits of _rd_, +sign extending the 32-bit result to the rest of _rd_. +This instruction only exists on RV64 based systems. + +Operation:: +[source,sail] +-- +let lo_half : bits(16) = X(rs1)[15..0]; +let hi_half : bits(16) = X(rs2)[15..0]; +X(rd) = EXTS(hi_half @ lo_half); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-rev8,reftext="Byte-reverse register"] +==== rev8 + +Synopsis:: +Byte-reverse register + +Mnemonic:: +rev8 _rd_, _rs_ + +Encoding (RV32):: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x13, attr: ['OP-IMM'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5 }, + { bits: 5, name: 'rs' }, + { bits: 12, name: 0x698 } +]} +.... + +Encoding (RV64):: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x13, attr: ['OP-IMM'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5 }, + { bits: 5, name: 'rs' }, + { bits: 12, name: 0x6b8 } +]} +.... + +Description:: +This instruction reverses the order of the bytes in _rs_. + +Operation:: +[source,sail] +-- +let input = X(rs); +let output : xlenbits = 0; +let j = xlen - 1; + +foreach (i from 0 to (xlen - 8) by 8) { + output[i..(i + 7)] = input[(j - 7)..j]; + j = j - 8; +} + +X[rd] = output +-- + +.Note +[NOTE, caption="A" ] +=============================================================== +The *rev8* mnemonic corresponds to different instruction encodings in RV32 and RV64. +=============================================================== + +.Software Hint +[NOTE, caption="SH" ] +=============================================================== +The byte-reverse operation is only available for the full register +width. To emulate word-sized and halfword-sized byte-reversal, +perform a `rev8 rd,rs` followed by a `srai rd,rd,K`, where K is +XLEN-32 and XLEN-16, respectively. +=============================================================== + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-rol,reftext="Rotate left (Register)"] +==== rol + +Synopsis:: +Rotate Left (Register) + +Mnemonic:: +rol _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x1, attr: ['ROL']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x30, attr: ['ROL'] }, +]} +.... + +Description:: +This instruction performs a rotate left of _rs1_ by the amount in least-significant log2(XLEN) bits of _rs2_. + +Operation:: +[source,sail] +-- +let shamt = if xlen == 32 + then X(rs2)[4..0] + else X(rs2)[5..0]; +let result = (X(rs1) << shamt) | (X(rs1) >> (xlen - shamt)); + +X(rd) = result; +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-rolw,reftext="Rotate Left Word (Register)"] +==== rolw + +Synopsis:: +Rotate Left Word (Register) + +Mnemonic:: +rolw _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x3b, attr: ['OP-32'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x1, attr: ['ROLW']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x30, attr: ['ROLW'] }, +]} +.... + +Description:: +This instruction performs a rotate left on the least-significant word of _rs1_ by the amount in least-significant 5 bits of _rs2_. +The resulting word value is sign-extended by copying bit 31 to all of the more-significant bits. + +Operation:: +[source,sail] +-- +let rs1 = EXTZ(X(rs1)[31..0]) +let shamt = X(rs2)[4..0]; +let result = (rs1 << shamt) | (rs1 >> (32 - shamt)); +X(rd) = EXTS(result[31..0]); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-ror, reftext="Rotate right (Register)"] +==== ror + +Synopsis:: +Rotate Right + +Mnemonic:: +ror _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5, attr: ['ROR']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x30, attr: ['ROR'] }, +]} +.... + +Description:: +This instruction performs a rotate right of _rs1_ by the amount in least-significant log2(XLEN) bits of _rs2_. + +Operation:: +[source,sail] +-- +let shamt = if xlen == 32 + then X(rs2)[4..0] + else X(rs2)[5..0]; +let result = (X(rs1) >> shamt) | (X(rs1) << (xlen - shamt)); + +X(rd) = result; +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-rori,reftext="Rotate right (Immediate)"] +==== rori + +Synopsis:: +Rotate Right (Immediate) + +Mnemonic:: +rori _rd_, _rs1_, _shamt_ + +Encoding (RV32):: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x13, attr: ['OP-IMM'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5, attr: ['RORI']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'shamt' }, + { bits: 7, name: 0x30, attr: ['RORI'] }, +]} +.... + +Encoding (RV64):: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x13, attr: ['OP-IMM'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5, attr: ['RORI']}, + { bits: 5, name: 'rs1' }, + { bits: 6, name: 'shamt' }, + { bits: 6, name: 0x18, attr: ['RORI'] }, +]} +.... + +Description:: +This instruction performs a rotate right of _rs1_ by the amount in the least-significant log2(XLEN) bits of _shamt_. +For RV32, the encodings corresponding to shamt[5]=1 are reserved. + +Operation:: +[source,sail] +-- +let shamt = if xlen == 32 + then shamt[4..0] + else shamt[5..0]; +let result = (X(rs1) >> shamt) | (X(rs1) << (xlen - shamt)); + +X(rd) = result; +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-roriw,reftext="Rotate right Word (Immediate)"] +==== roriw + +Synopsis:: +Rotate Right Word by Immediate + +Mnemonic:: +roriw _rd_, _rs1_, _shamt_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x1b, attr: ['OP-IMM-32'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5, attr: ['RORIW']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'shamt' }, + { bits: 7, name: 0x30, attr: ['RORIW'] }, +]} +.... + +Description:: +This instruction performs a rotate right on the least-significant word +of _rs1_ by the amount in the least-significant log2(XLEN) bits of +_shamt_. +The resulting word value is sign-extended by copying bit 31 to all of +the more-significant bits. + + +Operation:: +[source,sail] +-- +let rs1_data = EXTZ(X(rs1)[31..0]; +let result = (rs1_data >> shamt) | (rs1_data << (32 - shamt)); +X(rd) = EXTS(result[31..0]); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-rorw,reftext="Rotate right Word (Register)"] +==== rorw + +Synopsis:: +Rotate Right Word (Register) + +Mnemonic:: +rorw _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x3b, attr: ['OP-32'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x5, attr: ['RORW']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x30, attr: ['RORW'] }, +]} +.... + +Description:: +This instruction performs a rotate right on the least-significant word of _rs1_ by the amount in least-significant 5 bits of _rs2_. +The resultant word is sign-extended by copying bit 31 to all of the more-significant bits. + +Operation:: +[source,sail] +-- +let rs1 = EXTZ(X(rs1)[31..0]) +let shamt = X(rs2)[4..0]; +let result = (rs1 >> shamt) | (rs1 << (32 - shamt)); +X(rd) = EXTS(result); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-sha256sig0, reftext="SHA2-256 Sigma0 instruction"] +==== sha256sig0 + +Synopsis:: +Implements the Sigma0 transformation function as used in +the SHA2-256 hash function cite:[nist:fips:180:4] (Section 4.1.2). + +Mnemonic:: +sha256sig0 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x2}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for both RV32 and RV64 base architectures. +For RV32, the entire `XLEN` source register is operated on. +For RV64, the low `32` bits of the source register are operated on, and the +result sign extended to `XLEN` bits. +Though named for SHA2-256, the instruction works for both the +SHA2-224 and SHA2-256 parameterisations as described in +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA256SIG0(rs1,rd)) = { + let inb : bits(32) = X(rs1)[31..0]; + let result : bits(32) = ror32(inb, 7) ^ ror32(inb, 18) ^ (inb >> 3); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> +| v1.0.0 +| Frozen +| <<zkn>> +| v1.0.0 +| Frozen +| <<zk>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha256sig1, reftext="SHA2-256 Sigma1 instruction"] +==== sha256sig1 + +Synopsis:: +Implements the Sigma1 transformation function as used in +the SHA2-256 hash function cite:[nist:fips:180:4] (Section 4.1.2). + +Mnemonic:: +sha256sig1 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x3}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for both RV32 and RV64 base architectures. +For RV32, the entire `XLEN` source register is operated on. +For RV64, the low `32` bits of the source register are operated on, and the +result sign extended to `XLEN` bits. +Though named for SHA2-256, the instruction works for both the +SHA2-224 and SHA2-256 parameterisations as described in +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA256SIG1(rs1,rd)) = { + let inb : bits(32) = X(rs1)[31..0]; + let result : bits(32) = ror32(inb, 17) ^ ror32(inb, 19) ^ (inb >> 10); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> +| v1.0.0 +| Frozen +| <<zkn>> +| v1.0.0 +| Frozen +| <<zk>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha256sum0, reftext="SHA2-256 Sum0 instruction"] +==== sha256sum0 + +Synopsis:: +Implements the Sum0 transformation function as used in +the SHA2-256 hash function cite:[nist:fips:180:4] (Section 4.1.2). + +Mnemonic:: +sha256sum0 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x0}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for both RV32 and RV64 base architectures. +For RV32, the entire `XLEN` source register is operated on. +For RV64, the low `32` bits of the source register are operated on, and the +result sign extended to `XLEN` bits. +Though named for SHA2-256, the instruction works for both the +SHA2-224 and SHA2-256 parameterisations as described in +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA256SUM0(rs1,rd)) = { + let inb : bits(32) = X(rs1)[31..0]; + let result : bits(32) = ror32(inb, 2) ^ ror32(inb, 13) ^ ror32(inb, 22); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> +| v1.0.0 +| Frozen +| <<zkn>> +| v1.0.0 +| Frozen +| <<zk>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha256sum1, reftext="SHA2-256 Sum1 instruction"] +==== sha256sum1 + +Synopsis:: +Implements the Sum1 transformation function as used in +the SHA2-256 hash function cite:[nist:fips:180:4] (Section 4.1.2). + +Mnemonic:: +sha256sum1 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x1}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for both RV32 and RV64 base architectures. +For RV32, the entire `XLEN` source register is operated on. +For RV64, the low `32` bits of the source register are operated on, and the +result sign extended to `XLEN` bits. +Though named for SHA2-256, the instruction works for both the +SHA2-224 and SHA2-256 parameterisations as described in +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA256SUM1(rs1,rd)) = { + let inb : bits(32) = X(rs1)[31..0]; + let result : bits(32) = ror32(inb, 6) ^ ror32(inb, 11) ^ ror32(inb, 25); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> +| v1.0.0 +| Frozen +| <<zkn>> +| v1.0.0 +| Frozen +| <<zk>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig0h, reftext="SHA2-512 Sigma0 high (RV32)"] +==== sha512sig0h + +Synopsis:: +Implements the _high half_ of the Sigma0 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig0h rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0xe}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sigma0 transform of the SHA2-512 hash function +in conjunction with the <<insns-sha512sig0l,`sha512sig0l`>> instruction. +The transform is a 64-bit to 64-bit function, so the input and output +are each represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sigma0 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sig0l t0, a0, a1 + sha512sig0h t1, a1, a0 + +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG0H(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) >> 1) ^ (X(rs1) >> 7) ^ (X(rs1) >> 8) ^ + (X(rs2) << 31) ^ (X(rs2) << 24) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig0l, reftext="SHA2-512 Sigma0 low (RV32)"] +==== sha512sig0l + +Synopsis:: +Implements the _low half_ of the Sigma0 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig0l rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0xa}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sigma0 transform of the SHA2-512 hash function +in conjunction with the <<insns-sha512sig0h,`sha512sig0h`>> instruction. +The transform is a 64-bit to 64-bit function, so the input and output +are each represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sigma0 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sig0l t0, a0, a1 + sha512sig0h t1, a1, a0 + +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG0L(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) >> 1) ^ (X(rs1) >> 7) ^ (X(rs1) >> 8) ^ + (X(rs2) << 31) ^ (X(rs2) << 25) ^ (X(rs2) << 24) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig1h, reftext="SHA2-512 Sigma1 high (RV32)"] +==== sha512sig1h + +Synopsis:: +Implements the _high half_ of the Sigma1 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig1h rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0xf}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sigma1 transform of the SHA2-512 hash function +in conjunction with the <<insns-sha512sig1l,`sha512sig1l`>> instruction. +The transform is a 64-bit to 64-bit function, so the input and output +are each represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sigma1 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sig1l t0, a0, a1 + sha512sig1h t1, a1, a0 + +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG1H(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) << 3) ^ (X(rs1) >> 6) ^ (X(rs1) >> 19) ^ + (X(rs2) >> 29) ^ (X(rs2) << 13) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig1l, reftext="SHA2-512 Sigma1 low (RV32)"] +==== sha512sig1l + +Synopsis:: +Implements the _low half_ of the Sigma1 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig1l rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0xb}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sigma1 transform of the SHA2-512 hash function +in conjunction with the <<insns-sha512sig1h,`sha512sig1h`>> instruction. +The transform is a 64-bit to 64-bit function, so the input and output +are each represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sigma1 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sig1l t0, a0, a1 + sha512sig1h t1, a1, a0 + +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG1L(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) << 3) ^ (X(rs1) >> 6) ^ (X(rs1) >> 19) ^ + (X(rs2) >> 29) ^ (X(rs2) << 26) ^ (X(rs2) << 13) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sum0r, reftext="SHA2-512 Sum0 (RV32)"] +==== sha512sum0r + +Synopsis:: +Implements the Sum0 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sum0r rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sum0 transform of the SHA2-512 hash function. +The transform is a 64-bit to 64-bit function, so the input and output +is represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sum0 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sum0r t0, a0, a1 + sha512sum0r t1, a1, a0 + +Note the reversed source register ordering. +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SUM0R(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) << 25) ^ (X(rs1) << 30) ^ (X(rs1) >> 28) ^ + (X(rs2) >> 7) ^ (X(rs2) >> 2) ^ (X(rs2) << 4) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sum1r, reftext="SHA2-512 Sum1 (RV32)"] +==== sha512sum1r + +Synopsis:: +Implements the Sum1 transformation, as +used in the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sum1r rd, rs1, rs2 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0x9}, +{bits: 2, name: 0x1}, +]} +.... + +Description:: +This instruction is implemented on RV32 only. +Used to compute the Sum1 transform of the SHA2-512 hash function. +The transform is a 64-bit to 64-bit function, so the input and output +is represented by two 32-bit registers. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +[TIP] +.Note to software developers +==== +The entire Sum1 transform for SHA2-512 may be computed on RV32 +using the following instruction sequence: + + sha512sum1r t0, a0, a1 + sha512sum1r t1, a1, a0 + +Note the reversed source register ordering. +==== + +Operation:: +[source,sail] +-- +function clause execute (SHA512SUM1R(rs2, rs1, rd)) = { + X(rd) = EXTS((X(rs1) << 23) ^ (X(rs1) >> 14) ^ (X(rs1) >> 18) ^ + (X(rs2) >> 9) ^ (X(rs2) << 18) ^ (X(rs2) << 14) ); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV32) +| v1.0.0 +| Frozen +| <<zkn>> (RV32) +| v1.0.0 +| Frozen +| <<zk>> (RV32) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig0, reftext="SHA2-512 Sigma0 instruction (RV64)"] +==== sha512sig0 + +Synopsis:: +Implements the Sigma0 transformation function as used in +the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig0 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x6}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV64 base architecture. +It implements the Sigma0 transform of the SHA2-512 hash function. +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG0(rs1, rd)) = { + X(rd) = ror64(X(rs1), 1) ^ ror64(X(rs1), 8) ^ (X(rs1) >> 7); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV64) +| v1.0.0 +| Frozen +| <<zkn>> (RV64) +| v1.0.0 +| Frozen +| <<zk>> (RV64) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sig1, reftext="SHA2-512 Sigma1 instruction (RV64)"] +==== sha512sig1 + +Synopsis:: +Implements the Sigma1 transformation function as used in +the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sig1 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x7}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV64 base architecture. +It implements the Sigma1 transform of the SHA2-512 hash function. +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA512SIG1(rs1, rd)) = { + X(rd) = ror64(X(rs1), 19) ^ ror64(X(rs1), 61) ^ (X(rs1) >> 6); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV64) +| v1.0.0 +| Frozen +| <<zkn>> (RV64) +| v1.0.0 +| Frozen +| <<zk>> (RV64) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sum0, reftext="SHA2-512 Sum0 instruction (RV64)"] +==== sha512sum0 + +Synopsis:: +Implements the Sum0 transformation function as used in +the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sum0 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x4}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV64 base architecture. +It implements the Sum0 transform of the SHA2-512 hash function. +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA512SUM0(rs1, rd)) = { + X(rd) = ror64(X(rs1), 28) ^ ror64(X(rs1), 34) ^ ror64(X(rs1) ,39); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV64) +| v1.0.0 +| Frozen +| <<zkn>> (RV64) +| v1.0.0 +| Frozen +| <<zk>> (RV64) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sha512sum1, reftext="SHA2-512 Sum1 instruction (RV64)"] +==== sha512sum1 + +Synopsis:: +Implements the Sum1 transformation function as used in +the SHA2-512 hash function cite:[nist:fips:180:4] (Section 4.1.3). + +Mnemonic:: +sha512sum1 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x5}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV64 base architecture. +It implements the Sum1 transform of the SHA2-512 hash function. +cite:[nist:fips:180:4]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SHA512SUM1(rs1, rd)) = { + X(rd) = ror64(X(rs1), 14) ^ ror64(X(rs1), 18) ^ ror64(X(rs1) ,41); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zknh>> (RV64) +| v1.0.0 +| Frozen +| <<zkn>> (RV64) +| v1.0.0 +| Frozen +| <<zk>> (RV64) +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sm3p0, reftext="SM3 P0 transform"] +==== sm3p0 + +Synopsis:: +Implements the _P0_ transformation function as used in +the SM3 hash function cite:[gbt:sm3,iso:sm3]. + +Mnemonic:: +sm3p0 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x8}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV32 and RV64 base architectures. +It implements the _P0_ transform of the SM3 hash function cite:[gbt:sm3,iso:sm3]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +.Supporting Material +[NOTE] +==== +This instruction is based on work done in cite:[MJS:LWSHA:20]. +==== + +Operation:: +[source,sail] +-- +function clause execute (SM3P0(rs1, rd)) = { + let r1 : bits(32) = X(rs1)[31..0]; + let result : bits(32) = r1 ^ rol32(r1, 9) ^ rol32(r1, 17); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zksh>> +| v1.0.0 +| Frozen +| <<zks>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sm3p1, reftext="SM3 P1 transform"] +==== sm3p1 + +Synopsis:: +Implements the _P1_ transformation function as used in +the SM3 hash function cite:[gbt:sm3,iso:sm3]. + +Mnemonic:: +sm3p1 rd, rs1 + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x13}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x9}, +{bits: 5, name: 0x8}, +{bits: 2, name: 0x0}, +]} +.... + +Description:: +This instruction is supported for the RV32 and RV64 base architectures. +It implements the _P1_ transform of the SM3 hash function cite:[gbt:sm3,iso:sm3]. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +.Supporting Material +[NOTE] +==== +This instruction is based on work done in cite:[MJS:LWSHA:20]. +==== + +Operation:: +[source,sail] +-- +function clause execute (SM3P1(rs1, rd)) = { + let r1 : bits(32) = X(rs1)[31..0]; + let result : bits(32) = r1 ^ rol32(r1, 15) ^ rol32(r1, 23); + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zksh>> +| v1.0.0 +| Frozen +| <<zks>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sm4ed, reftext="SM4 Encrypt/Decrypt Instruction"] +==== sm4ed + +Synopsis:: +Accelerates the block encrypt/decrypt operation of the SM4 block cipher +cite:[gbt:sm4, iso:sm4]. + +Mnemonic:: +sm4ed rd, rs1, rs2, bs + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0x18}, +{bits: 2, name: 'bs'}, +]} +.... + +Description:: +Implements a T-tables in hardware style approach to accelerating the +SM4 round function. +A byte is extracted from `rs2` based on `bs`, to which the SBox and +linear layer transforms are applied, before the result is XOR'd with +`rs1` and written back to `rd`. +This instruction exists on RV32 and RV64 base architectures. +On RV64, the 32-bit result is sign extended to XLEN bits. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SM4ED (bs,rs2,rs1,rd)) = { + let shamt : bits(5) = bs @ 0b000; /* shamt = bs*8 */ + let sb_in : bits(8) = (X(rs2)[31..0] >> shamt)[7..0]; + let x : bits(32) = 0x000000 @ sm4_sbox(sb_in); + let y : bits(32) = x ^ (x << 8) ^ ( x << 2) ^ + (x << 18) ^ ((x & 0x0000003F) << 26) ^ + ((x & 0x000000C0) << 10); + let z : bits(32) = rol32(y, unsigned(shamt)); + let result: bits(32) = z ^ X(rs1)[31..0]; + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zksed>> +| v1.0.0 +| Frozen +| <<zks>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-sm4ks, reftext="SM4 Key Schedule Instruction"] +==== sm4ks + +Synopsis:: +Accelerates the Key Schedule operation of the SM4 block cipher +cite:[gbt:sm4, iso:sm4]. + +Mnemonic:: +sm4ks rd, rs1, rs2, bs + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x33}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x0}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'rs2'}, +{bits: 5, name: 0x1a}, +{bits: 2, name: 'bs'}, +]} +.... + +Description:: +Implements a T-tables in hardware style approach to accelerating the +SM4 Key Schedule. +A byte is extracted from `rs2` based on `bs`, to which the SBox and +linear layer transforms are applied, before the result is XOR'd with +`rs1` and written back to `rd`. +This instruction exists on RV32 and RV64 base architectures. +On RV64, the 32-bit result is sign extended to XLEN bits. +This instruction must _always_ be implemented such that its execution +latency does not depend on the data being operated on. + +Operation:: +[source,sail] +-- +function clause execute (SM4KS (bs,rs2,rs1,rd)) = { + let shamt : bits(5) = (bs @ 0b000); /* shamt = bs*8 */ + let sb_in : bits(8) = (X(rs2)[31..0] >> shamt)[7..0]; + let x : bits(32) = 0x000000 @ sm4_sbox(sb_in); + let y : bits(32) = x ^ ((x & 0x00000007) << 29) ^ ((x & 0x000000FE) << 7) ^ + ((x & 0x00000001) << 23) ^ ((x & 0x000000F8) << 13) ; + let z : bits(32) = rol32(y, unsigned(shamt)); + let result: bits(32) = z ^ X(rs1)[31..0]; + X(rd) = EXTS(result); + RETIRE_SUCCESS +} +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +| <<zksed>> +| v1.0.0 +| Frozen +| <<zks>> +| v1.0.0 +| Frozen +|=== + +<<< + +[#insns-unzip,reftext="Bit deinterleave"] +==== unzip + +Synopsis:: +Implements the inverse of the zip instruction. + +Mnemonic:: +unzip _rd_, _rs_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 2, name: 0x3}, +{bits: 5, name: 0x4}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x5}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x1f}, +{bits: 7, name: 0x4}, +]} +.... + +Description:: +This instruction gathers bits from the high and low halves of the source +word into odd/even bit positions in the destination word. +It is the inverse of the <<insns-zip,zip>> instruction. +This instruction is available only on RV32. + +Operation:: +[source,sail] +-- +foreach (i from 0 to xlen/2-1) { + X(rd)[i] = X(rs1)[2*i] + X(rd)[i+xlen/2] = X(rs1)[2*i+1] +} +-- + +.Software Hint +[NOTE, caption="SH" ] +=============================================================== +This instruction is useful for implementing the SHA3 cryptographic +hash function on a 32-bit architecture, as it implements the +bit-interleaving operation used to speed up the 64-bit rotations +directly. +=============================================================== + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbkb (<<#zbkb>>) (RV32) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-xnor,reftext="Exclusive NOR"] +==== xnor + +Synopsis:: +Exclusive NOR + +Mnemonic:: +xnor _rd_, _rs1_, _rs2_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ + { bits: 7, name: 0x33, attr: ['OP'] }, + { bits: 5, name: 'rd' }, + { bits: 3, name: 0x4, attr: ['XNOR']}, + { bits: 5, name: 'rs1' }, + { bits: 5, name: 'rs2' }, + { bits: 7, name: 0x20, attr: ['XNOR'] }, +]} +.... + +Description:: +This instruction performs the bit-wise exclusive-NOR operation on _rs1_ and _rs2_. + +Operation:: +[source,sail] +-- +X(rd) = ~(X(rs1) ^ X(rs2)); +-- + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbb (<<#zbb>>) +|v1.0.0 +|Frozen + +|Zbkb (<<#zbkb>>) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[#insns-zip,reftext="Bit interleave"] +==== zip + +Synopsis:: +Gather odd and even bits of the source word into upper/lower halves of the +destination. + +Mnemonic:: +zip _rd_, _rs_ + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 2, name: 0x3}, +{bits: 5, name: 0x4}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 0x1e}, +{bits: 7, name: 0x4}, +]} +.... + +Description:: +This instruction scatters all of the odd and even bits of a source word into +the high and low halves of a destination word. +It is the inverse of the <<insns-unzip,unzip>> instruction. +This instruction is available only on RV32. + +Operation:: +[source,sail] +-- +foreach (i from 0 to xlen/2-1) { + X(rd)[2*i] = X(rs1)[i] + X(rd)[2*i+1] = X(rs1)[i+xlen/2] +} +-- + +.Software Hint +[NOTE, caption="SH" ] +=============================================================== +This instruction is useful for implementing the SHA3 cryptographic +hash function on a 32-bit architecture, as it implements the +bit-interleaving operation used to speed up the 64-bit rotations +directly. +=============================================================== + +Included in:: +[%header,cols="4,2,2"] +|=== +|Extension +|Minimum version +|Lifecycle state + +|Zbkb (<<#zbkb>>) (RV32) +|v1.0.0-rc4 +|Frozen +|=== + +<<< + +[[crypto_scalar_es]] +=== Entropy Source + +The `seed` CSR provides an interface to a NIST SP 800-90B cite:[TuBaKe:18] +or BSI AIS-31 cite:[KiSc11] compliant physical Entropy Source (ES). + +An entropy source, by itself, is not a cryptographically secure Random +Bit Generator (RBG), but can be used to build standard (and nonstandard) +RBGs of many types with the help of symmetric cryptography. Expected usage +is to condition (typically with SHA-2/3) the output from an entropy source and +use it to seed a cryptographically secure Deterministic Random Bit Generator +(DRBG) such as AES-based `CTR_DRBG` cite:[BaKe15]. +The combination of an Entropy Source, Conditioning, and a DRBG can be used +to create random bits securely cite:[BaKeRo:21]. +See <<crypto_scalar_appx_es>> for a non-normative description of a +certification and self-certification procedures, design rationale, and more +detailed suggestions on how the entropy source output can be used. + +[[crypto_scalar_seed_csr]] +==== The `seed` CSR + +`seed` is an unprivileged CSR located at address `0x015`. +The 32-bit contents of `seed` are as follows: + +[%autowidth.stretch,cols="^,^,<",options="header",] +|======================================================================= +|Bits |Name |Description + +|`31:30` |`OPST` |Status: `BIST` (00), `WAIT` (01), `ES16` (10), `DEAD` +(11). + +|`29:24` |_reserved_ |For future use by the RISC-V specification. + +|`23:16` |_custom_ |Designated for custom and experimental use. + +|`15: 0` |`entropy` |16 bits of randomness, only when `OPST=ES16`. +|======================================================================= + +The `seed` CSR must be accessed with a read-write instruction. A read-only +instruction such as `CSRRS/CSRRC` with `rs1=x0` or `CSRRSI/CSRRCI` with +`uimm=0` will raise an illegal instruction exception. +The write value (in `rs1` or `uimm`) must be ignored by implementations. +The purpose of the write is to signal polling and flushing. + +The instruction `csrrw rd, seed, x0` can be used for fetching seed status +and entropy values. It is available on both RV32 and RV64 base architectures +and will zero-extend the 32-bit word to XLEN bits. + +Encoding:: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 0x73, attr: "SYSTEM"}, +{bits: 5, name: 'rd'}, +{bits: 3, name: 0x1, attr: "CSRRW"}, +{bits: 5, name: 0x0, attr: "x0"}, +{bits: 12, name: 0x15, attr: "seed = 0x015"}, +]} +.... + +The `seed` CSR is also access controlled by execution mode, and attempted +read or write access will raise an illegal instruction exception outside M mode +unless access is explicitly granted. See <<crypto_scalar_es_access>> for +more details. + +The status bits `seed[31:30]` = `OPST` may be `ES16` (10), +indicating successful polling, or one of three entropy polling failure +statuses `BIST` (00), `WAIT` (01), or `DEAD` (11), discussed below. + +Each returned `seed[15:0]` = `entropy` value represents unique randomness +when `OPST`=`ES16` (`seed[31:30]` = `10`), even if its numerical value is +the same as that of a previously polled `entropy` value. The implementation +requirements of `entropy` bits are defined in <<crypto_scalar_es_req>>. +When `OPST` is not `ES16`, `entropy` must be set to 0. +An implementation may safely set reserved and custom bits to zeros. + +For security reasons, the interface guarantees that secret `entropy` +words are not made available multiple times. Hence polling (reading) must +also have the side effect of clearing (wipe-on-read) the `entropy` contents and +changing the state to `WAIT` (unless there is `entropy` +immediately available for `ES16`). Other states (`BIST`, `WAIT`, and `DEAD`) +may be unaffected by polling. + +The Status Bits returned in `seed[31:30]`=`OPST`: + +* `00` - `BIST` +indicates that Built-In Self-Test "on-demand" (BIST) testing is being +performed. If `OPST` returns temporarily to `BIST` from any other +state, this signals a non-fatal self-test alarm, +which is non-actionable, apart from being logged. +Such a `BIST` alarm must be latched until polled at least once to enable +software to record its occurrence. + +* `01` - `WAIT` +means that a sufficient amount of entropy is not yet available. This +is not an error condition and may (in fact) be more frequent than ES16 +since physical entropy sources often have low bandwidth. + +* `10` - `ES16` +indicates success; the low bits `seed[15:0]` will have 16 bits of +randomness (`entropy`), which is guaranteed to meet certain minimum entropy +requirements, regardless of implementation. + +* `11` - `DEAD` +is an unrecoverable self-test error. This may indicate a hardware +fault, a security issue, or (extremely rarely) a type-1 statistical +false positive in the continuous testing procedures. In case of a fatal +failure, an immediate lockdown may also be an appropriate response in +dedicated security devices. + +**Example.** `0x8000ABCD` is a valid `ES16` status output, with `0xABCD` +being the `entropy` value. `0xFFFFFFFF` is an invalid output (`DEAD`) with +no `entropy` value. + +[[crypto_scalar_es_state,reftext="Entropy Source State Transition Diagram"]] +==== +image::es_state.svg[title="Entropy Source state transition diagram.", align="center",scaledwidth=40%] +Normally the operational state alternates between WAIT +(no data) and ES16, which means that 16 bits of randomness (`entropy`) +have been polled. BIST (Built-in Self-Test) only occurs after reset +or to signal a non-fatal self-test alarm (if reached after WAIT or +ES16). DEAD is an unrecoverable error state. +==== + +[[crypto_scalar_es_req]] +==== Entropy Source Requirements + +The output `entropy` (`seed[15:0]` in ES16 state) is not necessarily +fully conditioned randomness due to hardware and energy limitations +of smaller, low-powered implementations. However, minimum requirements are +defined. The main requirement is that 2-to-1 cryptographic post-processing +in 256-bit input blocks will yield 128-bit "full entropy" output blocks. +Entropy source users may make this conservative assumption but are not +prohibited from using more than twice the number of seed bits relative +to the desired resulting entropy. + +An implementation of the entropy source should meet at least one of the +following requirements sets in order to be considered a secure and +safe design: + +* <<crypto_scalar_es_req_90b>>: A physical entropy source meeting + NIST SP 800-90B cite:[TuBaKe:18] criteria with evaluated min-entropy + of 192 bits for each 256 output bits (min-entropy rate 0.75). + +* <<crypto_scalar_es_req_ptg2>>: A physical entropy source meeting the + AIS-31 PTG.2 cite:[KiSc11] criteria, implying average Shannon entropy + rate 0.997. The source must also meet the NIST 800-90B + min-entropy rate 192/256 = 0.75. + +* <<crypto_scalar_es_req_virt>>: A virtual entropy source is a DRBG + seeded from a physical entropy source. It must have at least a + 256-bit (Post-Quantum Category 5) internal security level. + +All implementations must signal initialization, test mode, and health +alarms as required by respective standards. This may require the implementer +to add non-standard (custom) test interfaces in a secure and safe manner, +an example of which is described in <<crypto_scalar_es_getnoise>> + + +[[crypto_scalar_es_req_90b]] +===== NIST SP 800-90B / FIPS 140-3 Requirements + +All NIST SP 800-90B cite:[TuBaKe:18] required components and health test +mechanisms must be implemented. + +The entropy requirement is satisfied if 128 bits of _full entropy_ can be +obtained from each 256-bit (16*16 -bit) successful, but possibly +non-consecutive `entropy` (ES16) output sequence using a vetted conditioning +algorithm such as a cryptographic hash (See Section 3.1.5.1.1, SP 800-90B +cite:[TuBaKe:18]). In practice, a min-entropy rate of 0.75 or larger is +required for this. + +Note that 128 bits of estimated input min-entropy does not yield 128 bits of +conditioned, full entropy in SP 800-90B/C evaluation. Instead, the +implication is that every 256-bit sequence should have min-entropy of at +least 128+64 = 192 bits, as discussed in SP 800-90C cite:[BaKeRo:21]; +the likelihood of successfully "guessing" an individual 256-bit output +sequence should not be higher than 2^-192^ even with (almost) +unconstrained amount of entropy source data and computational power. + +Rather than attempting to define all the mathematical and architectural +properties that the entropy source must satisfy, we define that the physical +entropy source be strong and robust enough to pass the equivalent of +NIST SP 800-90 evaluation and certification for full entropy when +conditioned cryptographically in ratio 2:1 with 128-bit output blocks. + +Even though the requirement is defined in terms of 128-bit full entropy +blocks, we recommend 256-bit security. This can be accomplished by using +at least 512 `entropy` bits to initialize a DRBG that has 256-bit security. + +[[crypto_scalar_es_req_ptg2]] +===== BSI AIS-31 PTG.2 / Common Criteria Requirements + +For alternative Common Criteria certification (or self-certification), +AIS 31 PTG.2 class cite:[KiSc11] (Sect. 4.3.) required hardware components +and mechanisms must be implemented. +In addition to AIS-31 PTG.2 randomness requirements (Shannon entropy rate of +0.997 as evaluated in that standard), the overall min-entropy requirement of +remains, as discussed in <<crypto_scalar_es_req_90b>>. Note that 800-90B +min-entropy can be significantly lower than AIS-31 Shannon entropy. These +two metrics should not be equated or confused with each other. + + +[[crypto_scalar_es_req_virt]] +===== Virtual Sources: Security Requirement + +NOTE: A virtual source is not an ISA compliance requirement. It is defined +for the benefit of the RISC-V security ecosystem so that virtual systems +may have a consistent level of security. + +A virtual source is not a physical entropy source but provides +additional protection against covert channels, depletion attacks, and host +identification in operating environments that can not be entirely trusted +with direct access to a hardware resource. Despite limited trust, +implementors should try to guarantee that even such environments have +sufficient entropy available for secure cryptographic operations. + +A virtual source traps access to the `seed` CSR, emulates it, or +otherwise implements it, possibly without direct access to a physical entropy +source. The output can be cryptographically secure pseudorandomness +instead of real entropy, but must have at least 256-bit security, as defined +below. A virtual source is intended especially for guest operating +systems, sandboxes, emulators, and similar use cases. + +As a technical definition, a random-distinguishing attack against +the output should require computational resources comparable or greater +than those required for exhaustive key search on a secure block cipher +with a 256-bit key (e.g., AES 256). This applies to both classical +and quantum computing models, but only classical information flows. +The virtual source security requirement maps to Post-Quantum Security +Category 5 cite:[NI16]. + +Any implementation of the `seed` CSR that limits the security +strength shall not reduce it to less than 256 bits. If the security +level is under 256 bits, then the interface must not be available. + +A virtual entropy source does not need to implement `WAIT` or `BIST` states. +It should fail (`DEAD`) if the host DRBG or entropy source fails and +there is insufficient seeding material for the host DRBG. + + +[[crypto_scalar_es_access]] +==== Access Control to `seed` + +The `seed` CSR is by default only available in M mode, but can be made +available to other modes via the `mseccfg.sseed` and `mseccfg.useed` +access control bits. `sseed` is bit `9` of and `useed` is +bit `8` of the `mseccfg` CSR. +Without the corresponding access control bit set to 1, any attempted +access to `seed` from U, S, or HS modes will raise an illegal instruction +exception. + +VS and VU modes are present in systems with Hypervisor (H) extension +implemented. If desired, a hypervisor can emulate accesses to the seed CSR +from a virtual machine. Attempted access to `seed` from virtual modes +VS and VU always raises an exception; a read-only instruction causes an +illegal instruction exception, while a read-write instruction (that can +potentially be emulated) causes a virtual instruction exception only if +`mseccfg.sseed=1`. Note that `mseccfg.useed` has no effect on the exception +type for either VS or VU modes. + +.Entropy Source Access Control. + +[cols="1,1,1,7",options="header",] +|======================================================================= +|Mode | `sseed` | `useed` | Description + +| M +| `*` +| `*` +| The `seed` CSR is always available in machine mode as normal (with a +CSR read-write instruction.) Attempted read without a write raises an +illegal instruction exception regardless of mode and access control bits. + +| U +| `*` +| `0` +| Any `seed` CSR access raises an illegal instruction exception. + +| U +| `*` +| `1` +| The `seed` CSR is accessible as normal. No exception is raised for read-write. + +| S/HS +| `0` +| `*` +| Any `seed` CSR access raises an illegal instruction exception. + + +| S/HS +| `1` +| `*` +| The `seed` CSR is accessible as normal. No exception is raised for read-write. + +| VS/VU +| `0` +| `*` +| Any `seed` CSR access raises an illegal instruction exception. + +| VS/VU +| `1` +| `*` +| A read-write `seed` access raises a virtual instruction exception, +while other access conditions raise an illegal instruction exception. + +|======================================================================= + + +Systems should implement carefully considered access control policies from +lower privilege modes to physical entropy sources. The system can trap +attempted access to `seed` and feed a less privileged client +_virtual entropy source_ data (<<crypto_scalar_es_req_virt>>) instead of +invoking an SP 800-90B (<<crypto_scalar_es_req_90b>>) or PTG.2 +(<<crypto_scalar_es_req_ptg2>>) _physical entropy source_. Emulated `seed` +data generation is made with an appropriately seeded, secure software DRBG. +See <<crypto_scalar_appx_es_access>> for security considerations related +to direct access to entropy sources. + +Implementations may implement `mseccfg` such that `[s,u]seed` is a read-only +constant value `0`. Software may discover if access to the `seed` CSR can be +enabled in U and S mode by writing a `1` to `[s,u]seed` and reading back +the result. + +If S or U mode is not implemented, then the corresponding `[s,u]seed` +bits of `mseccfg` must be hardwired to zero. +The `[s,u]seed` bits must have a defined reset value. The system +must not allow them to be in an undefined state after a reset. +`mseccfg` exists if `Zkr` is implemented, or if it is required by other +processor features. If `Zkr` is _not_ implemented, the `[s,u]seed` bits must +be hardwired to zero. + |