aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Gouriou <ego@rivosinc.com>2023-06-01 18:06:55 -0700
committerEric Gouriou <ego@rivosinc.com>2023-06-19 14:30:32 -0700
commite87038ee5e6545a5149cdf4334d220f951534f30 (patch)
treeb2224a87df4346e9f5b0909058a08e4f9be2aeef
parentd5c0339484323b5a9498576d70ec90eab2e13438 (diff)
downloadriscv-isa-sim-e87038ee5e6545a5149cdf4334d220f951534f30.zip
riscv-isa-sim-e87038ee5e6545a5149cdf4334d220f951534f30.tar.gz
riscv-isa-sim-e87038ee5e6545a5149cdf4334d220f951534f30.tar.bz2
Zvk: Implement Zvbb, Vector Bit-manipulation for Cryptography
Implement the proposed instructions in Zvbb: - vandn.{vv,vx}, vector bitwise and-not - vbrev.v, vector bit reverse in element - vbrev8.v, vector bit reverse in bytes - vrev8.v, vector byte reverse - vctz.v, vector count trailing zeros - vclz.v, vector count leading zeros - vcpop.v, vector population count - vrol.{vv,vx}, vector rotate left - vror.{vi,vv,vx}, vector rotate right - vwsll.{vi,vv,vx} vector widening shift left logical A new instruction field, 'zimm6', is introduced, encoded in bits [15, 19] and [26].. It is used by "vror.vi" to encode a shift immediate in [0, 63]. Co-authored-by: Raghav Gupta <rgupta@rivosinc.com> Co-authored-by: Stanislaw Kardach <kda@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>
-rw-r--r--riscv/decode.h1
-rw-r--r--riscv/insns/vandn_vv.h10
-rw-r--r--riscv/insns/vandn_vx.h10
-rw-r--r--riscv/insns/vbrev8_v.h13
-rw-r--r--riscv/insns/vbrev_v.h24
-rw-r--r--riscv/insns/vclz_v.h16
-rw-r--r--riscv/insns/vcpop_v.h16
-rw-r--r--riscv/insns/vctz_v.h16
-rw-r--r--riscv/insns/vrev8_v.h16
-rw-r--r--riscv/insns/vrol_vv.h17
-rw-r--r--riscv/insns/vrol_vx.h18
-rw-r--r--riscv/insns/vror_vi.h18
-rw-r--r--riscv/insns/vror_vv.h17
-rw-r--r--riscv/insns/vror_vx.h18
-rw-r--r--riscv/insns/vwsll_vi.h10
-rw-r--r--riscv/insns/vwsll_vv.h10
-rw-r--r--riscv/insns/vwsll_vx.h10
-rw-r--r--riscv/riscv.mk.in22
18 files changed, 262 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index dad32a1..cd1c0a1 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -140,6 +140,7 @@ public:
uint64_t v_vta() { return x(26, 1); }
uint64_t v_vma() { return x(27, 1); }
uint64_t v_mew() { return x(28, 1); }
+ uint64_t v_zimm6() { return x(15, 5) + (x(26, 1) << 5); }
uint64_t p_imm2() { return x(20, 2); }
uint64_t p_imm3() { return x(20, 3); }
diff --git a/riscv/insns/vandn_vv.h b/riscv/insns/vandn_vv.h
new file mode 100644
index 0000000..d85e47d
--- /dev/null
+++ b/riscv/insns/vandn_vv.h
@@ -0,0 +1,10 @@
+// vandn.vv vd, vs2, vs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_VV_LOOP
+({
+ vd = vs2 & (~vs1);
+})
diff --git a/riscv/insns/vandn_vx.h b/riscv/insns/vandn_vx.h
new file mode 100644
index 0000000..1c66a40
--- /dev/null
+++ b/riscv/insns/vandn_vx.h
@@ -0,0 +1,10 @@
+// vandn.vx vd, vs2, rs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_VX_LOOP
+({
+ vd = vs2 & (~rs1);
+})
diff --git a/riscv/insns/vbrev8_v.h b/riscv/insns/vbrev8_v.h
new file mode 100644
index 0000000..a6d3cda
--- /dev/null
+++ b/riscv/insns/vbrev8_v.h
@@ -0,0 +1,13 @@
+// vbrev8.v vd, vs2, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ vd = vs2;
+ vd = ((vd & 0x5555555555555555llu) << 1) | ((vd & 0xAAAAAAAAAAAAAAAAllu) >> 1);
+ vd = ((vd & 0x3333333333333333llu) << 2) | ((vd & 0xCCCCCCCCCCCCCCCCllu) >> 2);
+ vd = ((vd & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((vd & 0xF0F0F0F0F0F0F0F0llu) >> 4);
+})
diff --git a/riscv/insns/vbrev_v.h b/riscv/insns/vbrev_v.h
new file mode 100644
index 0000000..7f784c2
--- /dev/null
+++ b/riscv/insns/vbrev_v.h
@@ -0,0 +1,24 @@
+// vbrev.v vd, vs2
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ reg_t x = vs2;
+
+ // Reverse bits in bytes (vbrev8)
+ x = ((x & 0x5555555555555555llu) << 1) | ((x & 0xAAAAAAAAAAAAAAAAllu) >> 1);
+ x = ((x & 0x3333333333333333llu) << 2) | ((x & 0xCCCCCCCCCCCCCCCCllu) >> 2);
+ x = ((x & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((x & 0xF0F0F0F0F0F0F0F0llu) >> 4);
+ // Re-order bytes (vrev8)
+ if (P.VU.vsew > 8)
+ x = ((x & 0x00FF00FF00FF00FFllu) << 8) | ((x & 0xFF00FF00FF00FF00llu) >> 8);
+ if (P.VU.vsew > 16)
+ x = ((x & 0x0000FFFF0000FFFFllu) << 16) | ((x & 0xFFFF0000FFFF0000llu) >> 16);
+ if (P.VU.vsew > 32)
+ x = ((x & 0x00000000FFFFFFFFllu) << 32) | ((x & 0xFFFFFFFF00000000llu) >> 32);
+
+ vd = x;
+})
diff --git a/riscv/insns/vclz_v.h b/riscv/insns/vclz_v.h
new file mode 100644
index 0000000..5f7f03c
--- /dev/null
+++ b/riscv/insns/vclz_v.h
@@ -0,0 +1,16 @@
+// vclz.v vd, vs2
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ unsigned int i = 0;
+ for (; i < P.VU.vsew; ++i) {
+ if (1 & (vs2 >> (P.VU.vsew - 1 - i))) {
+ break;
+ }
+ }
+ vd = i;
+})
diff --git a/riscv/insns/vcpop_v.h b/riscv/insns/vcpop_v.h
new file mode 100644
index 0000000..52b29c6
--- /dev/null
+++ b/riscv/insns/vcpop_v.h
@@ -0,0 +1,16 @@
+// vpopc.v vd, vs2
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ reg_t count = 0;
+ for (std::size_t i = 0; i < P.VU.vsew; ++i) {
+ if (1 & (vs2 >> i)) {
+ count++;
+ }
+ }
+ vd = count;
+})
diff --git a/riscv/insns/vctz_v.h b/riscv/insns/vctz_v.h
new file mode 100644
index 0000000..b63dd01
--- /dev/null
+++ b/riscv/insns/vctz_v.h
@@ -0,0 +1,16 @@
+// vctz.v vd, vs2
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ unsigned int i = 0;
+ for (; i < P.VU.vsew; ++i) {
+ if (1 & (vs2 >> i)) {
+ break;
+ }
+ }
+ vd = i;
+})
diff --git a/riscv/insns/vrev8_v.h b/riscv/insns/vrev8_v.h
new file mode 100644
index 0000000..f26c5a0
--- /dev/null
+++ b/riscv/insns/vrev8_v.h
@@ -0,0 +1,16 @@
+// vrev8.v vd, vs2, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_V_ULOOP
+({
+ vd = vs2;
+ if (P.VU.vsew > 8)
+ vd = ((vd & 0x00FF00FF00FF00FFllu) << 8) | ((vd & 0xFF00FF00FF00FF00llu) >> 8);
+ if (P.VU.vsew > 16)
+ vd = ((vd & 0x0000FFFF0000FFFFllu) << 16) | ((vd & 0xFFFF0000FFFF0000llu) >> 16);
+ if (P.VU.vsew > 32)
+ vd = ((vd & 0x00000000FFFFFFFFllu) << 32) | ((vd & 0xFFFFFFFF00000000llu) >> 32);
+})
diff --git a/riscv/insns/vrol_vv.h b/riscv/insns/vrol_vv.h
new file mode 100644
index 0000000..fb2e483
--- /dev/null
+++ b/riscv/insns/vrol_vv.h
@@ -0,0 +1,17 @@
+// vrol.vv vd, vs2, vs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+// 'mask' selects the low log2(vsew) bits of the shift amount,
+// to limit the maximum shift to "vsew - 1" bits.
+const reg_t mask = P.VU.vsew - 1;
+
+VI_VV_ULOOP
+({
+ // For .vv, the shift amount comes from the vs1 element.
+ const reg_t lshift = vs1 & mask;
+ const reg_t rshift = (-lshift) & mask;
+ vd = (vs2 << lshift) | (vs2 >> rshift);
+})
diff --git a/riscv/insns/vrol_vx.h b/riscv/insns/vrol_vx.h
new file mode 100644
index 0000000..b0c89a2
--- /dev/null
+++ b/riscv/insns/vrol_vx.h
@@ -0,0 +1,18 @@
+// vrol.vx vd, vs2, rs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+// 'mask' selects the low log2(vsew) bits of the shift amount,
+// to limit the maximum shift to "vsew - 1" bits.
+const reg_t mask = P.VU.vsew - 1;
+
+// For .vx, the shift amount comes from rs1.
+const reg_t lshift = ((reg_t)RS1) & mask;
+const reg_t rshift = (-lshift) & mask;
+
+VI_V_ULOOP
+({
+ vd = (vs2 << lshift) | (vs2 >> rshift);
+})
diff --git a/riscv/insns/vror_vi.h b/riscv/insns/vror_vi.h
new file mode 100644
index 0000000..1269c3d
--- /dev/null
+++ b/riscv/insns/vror_vi.h
@@ -0,0 +1,18 @@
+// vror.vi vd, vs2, zimm6, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+// 'mask' selects the low log2(vsew) bits of the shift amount,
+// to limit the maximum shift to "vsew - 1" bits.
+const reg_t mask = P.VU.vsew - 1;
+
+// For .vi, the shift amount comes from bits [26,19-15].
+const reg_t rshift = insn.v_zimm6() & mask;
+const reg_t lshift = (-rshift) & mask;
+
+VI_V_ULOOP
+({
+ vd = (vs2 << lshift) | (vs2 >> rshift);
+})
diff --git a/riscv/insns/vror_vv.h b/riscv/insns/vror_vv.h
new file mode 100644
index 0000000..c649c6d
--- /dev/null
+++ b/riscv/insns/vror_vv.h
@@ -0,0 +1,17 @@
+// vror.vv vd, vs2, vs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+// 'mask' selects the low log2(vsew) bits of the shift amount,
+// to limit the maximum shift to "vsew - 1" bits.
+const reg_t mask = P.VU.vsew - 1;
+
+VI_VV_ULOOP
+({
+ // For .vv, the shift amount comes from the vs1 element.
+ const reg_t rshift = vs1 & mask;
+ const reg_t lshift = (-rshift) & mask;
+ vd = (vs2 << lshift) | (vs2 >> rshift);
+})
diff --git a/riscv/insns/vror_vx.h b/riscv/insns/vror_vx.h
new file mode 100644
index 0000000..50c8e5c
--- /dev/null
+++ b/riscv/insns/vror_vx.h
@@ -0,0 +1,18 @@
+// vror.vx vd, vs2, rs1, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+// 'mask' selects the low log2(vsew) bits of the shift amount,
+// to limit the maximum shift to "vsew - 1" bits.
+const reg_t mask = P.VU.vsew - 1;
+
+// For .vx, the shift amount comes from rs1.
+const reg_t rshift = ((reg_t)RS1) & mask;
+const reg_t lshift = (-rshift) & mask;
+
+VI_V_ULOOP
+({
+ vd = (vs2 << lshift) | (vs2 >> rshift);
+})
diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h
new file mode 100644
index 0000000..13b5eb4
--- /dev/null
+++ b/riscv/insns/vwsll_vi.h
@@ -0,0 +1,10 @@
+// vwsll.vi vd, vs2, zimm5, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_ZVK_VI_WIDENING_ULOOP({
+ const reg_t shift = zimm5 & ((2 * sew) - 1);
+ vd_w = vs2_w << shift;
+});
diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h
new file mode 100644
index 0000000..5a64c6c
--- /dev/null
+++ b/riscv/insns/vwsll_vv.h
@@ -0,0 +1,10 @@
+// vwsll.vv vd, vs2, zimm5, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_ZVK_VV_WIDENING_ULOOP({
+ const reg_t shift = (vs1 & ((2 * sew) - 1));
+ vd_w = vs2_w << shift;
+});
diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h
new file mode 100644
index 0000000..5264e80
--- /dev/null
+++ b/riscv/insns/vwsll_vx.h
@@ -0,0 +1,10 @@
+// vwsll.vx vd, vs2, zimm5, vm
+
+#include "zvk_ext_macros.h"
+
+require_zvbb;
+
+VI_ZVK_VX_WIDENING_ULOOP({
+ const reg_t shift = (rs1 & ((2 * sew) - 1));
+ vd_w = vs2_w << shift;
+});
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index 3b493a0..4aa23e3 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -1340,6 +1340,27 @@ riscv_insn_ext_zacas = \
amocas_d \
$(if $(HAVE_INT128),amocas_q)
+riscv_insn_ext_zvbb = \
+ vandn_vv \
+ vandn_vx \
+ vbrev8_v \
+ vbrev_v \
+ vclz_v \
+ vcpop_v \
+ vctz_v \
+ vrev8_v \
+ vrol_vv \
+ vrol_vx \
+ vror_vi \
+ vror_vv \
+ vror_vx \
+ vwsll_vi \
+ vwsll_vv \
+ vwsll_vx \
+
+riscv_insn_ext_zvk = \
+ $(riscv_insn_ext_zvbb) \
+
riscv_insn_list = \
$(if $(HAVE_INT128),$(riscv_insn_ext_v),) \
$(riscv_insn_ext_a) \
@@ -1363,6 +1384,7 @@ riscv_insn_list = \
$(riscv_insn_ext_zfh) \
$(riscv_insn_ext_zfh_zfa) \
$(riscv_insn_ext_zicond) \
+ $(riscv_insn_ext_zvk) \
$(riscv_insn_priv) \
$(riscv_insn_smrnmi) \
$(riscv_insn_svinval) \