diff options
-rw-r--r-- | riscv/decode.h | 1 | ||||
-rw-r--r-- | riscv/insns/vandn_vv.h | 10 | ||||
-rw-r--r-- | riscv/insns/vandn_vx.h | 10 | ||||
-rw-r--r-- | riscv/insns/vbrev8_v.h | 13 | ||||
-rw-r--r-- | riscv/insns/vbrev_v.h | 24 | ||||
-rw-r--r-- | riscv/insns/vclz_v.h | 16 | ||||
-rw-r--r-- | riscv/insns/vcpop_v.h | 16 | ||||
-rw-r--r-- | riscv/insns/vctz_v.h | 16 | ||||
-rw-r--r-- | riscv/insns/vrev8_v.h | 16 | ||||
-rw-r--r-- | riscv/insns/vrol_vv.h | 17 | ||||
-rw-r--r-- | riscv/insns/vrol_vx.h | 18 | ||||
-rw-r--r-- | riscv/insns/vror_vi.h | 18 | ||||
-rw-r--r-- | riscv/insns/vror_vv.h | 17 | ||||
-rw-r--r-- | riscv/insns/vror_vx.h | 18 | ||||
-rw-r--r-- | riscv/insns/vwsll_vi.h | 10 | ||||
-rw-r--r-- | riscv/insns/vwsll_vv.h | 10 | ||||
-rw-r--r-- | riscv/insns/vwsll_vx.h | 10 | ||||
-rw-r--r-- | riscv/riscv.mk.in | 22 |
18 files changed, 262 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index dad32a1..cd1c0a1 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -140,6 +140,7 @@ public: uint64_t v_vta() { return x(26, 1); } uint64_t v_vma() { return x(27, 1); } uint64_t v_mew() { return x(28, 1); } + uint64_t v_zimm6() { return x(15, 5) + (x(26, 1) << 5); } uint64_t p_imm2() { return x(20, 2); } uint64_t p_imm3() { return x(20, 3); } diff --git a/riscv/insns/vandn_vv.h b/riscv/insns/vandn_vv.h new file mode 100644 index 0000000..d85e47d --- /dev/null +++ b/riscv/insns/vandn_vv.h @@ -0,0 +1,10 @@ +// vandn.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_VV_LOOP +({ + vd = vs2 & (~vs1); +}) diff --git a/riscv/insns/vandn_vx.h b/riscv/insns/vandn_vx.h new file mode 100644 index 0000000..1c66a40 --- /dev/null +++ b/riscv/insns/vandn_vx.h @@ -0,0 +1,10 @@ +// vandn.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_VX_LOOP +({ + vd = vs2 & (~rs1); +}) diff --git a/riscv/insns/vbrev8_v.h b/riscv/insns/vbrev8_v.h new file mode 100644 index 0000000..a6d3cda --- /dev/null +++ b/riscv/insns/vbrev8_v.h @@ -0,0 +1,13 @@ +// vbrev8.v vd, vs2, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + vd = vs2; + vd = ((vd & 0x5555555555555555llu) << 1) | ((vd & 0xAAAAAAAAAAAAAAAAllu) >> 1); + vd = ((vd & 0x3333333333333333llu) << 2) | ((vd & 0xCCCCCCCCCCCCCCCCllu) >> 2); + vd = ((vd & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((vd & 0xF0F0F0F0F0F0F0F0llu) >> 4); +}) diff --git a/riscv/insns/vbrev_v.h b/riscv/insns/vbrev_v.h new file mode 100644 index 0000000..7f784c2 --- /dev/null +++ b/riscv/insns/vbrev_v.h @@ -0,0 +1,24 @@ +// vbrev.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + reg_t x = vs2; + + // Reverse bits in bytes (vbrev8) + x = ((x & 0x5555555555555555llu) << 1) | ((x & 0xAAAAAAAAAAAAAAAAllu) >> 1); + x = ((x & 0x3333333333333333llu) << 2) | ((x & 0xCCCCCCCCCCCCCCCCllu) >> 2); + x = ((x & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((x & 0xF0F0F0F0F0F0F0F0llu) >> 4); + // Re-order bytes (vrev8) + if (P.VU.vsew > 8) + x = ((x & 0x00FF00FF00FF00FFllu) << 8) | ((x & 0xFF00FF00FF00FF00llu) >> 8); + if (P.VU.vsew > 16) + x = ((x & 0x0000FFFF0000FFFFllu) << 16) | ((x & 0xFFFF0000FFFF0000llu) >> 16); + if (P.VU.vsew > 32) + x = ((x & 0x00000000FFFFFFFFllu) << 32) | ((x & 0xFFFFFFFF00000000llu) >> 32); + + vd = x; +}) diff --git a/riscv/insns/vclz_v.h b/riscv/insns/vclz_v.h new file mode 100644 index 0000000..5f7f03c --- /dev/null +++ b/riscv/insns/vclz_v.h @@ -0,0 +1,16 @@ +// vclz.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + unsigned int i = 0; + for (; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> (P.VU.vsew - 1 - i))) { + break; + } + } + vd = i; +}) diff --git a/riscv/insns/vcpop_v.h b/riscv/insns/vcpop_v.h new file mode 100644 index 0000000..52b29c6 --- /dev/null +++ b/riscv/insns/vcpop_v.h @@ -0,0 +1,16 @@ +// vpopc.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + reg_t count = 0; + for (std::size_t i = 0; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> i)) { + count++; + } + } + vd = count; +}) diff --git a/riscv/insns/vctz_v.h b/riscv/insns/vctz_v.h new file mode 100644 index 0000000..b63dd01 --- /dev/null +++ b/riscv/insns/vctz_v.h @@ -0,0 +1,16 @@ +// vctz.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + unsigned int i = 0; + for (; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> i)) { + break; + } + } + vd = i; +}) diff --git a/riscv/insns/vrev8_v.h b/riscv/insns/vrev8_v.h new file mode 100644 index 0000000..f26c5a0 --- /dev/null +++ b/riscv/insns/vrev8_v.h @@ -0,0 +1,16 @@ +// vrev8.v vd, vs2, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + vd = vs2; + if (P.VU.vsew > 8) + vd = ((vd & 0x00FF00FF00FF00FFllu) << 8) | ((vd & 0xFF00FF00FF00FF00llu) >> 8); + if (P.VU.vsew > 16) + vd = ((vd & 0x0000FFFF0000FFFFllu) << 16) | ((vd & 0xFFFF0000FFFF0000llu) >> 16); + if (P.VU.vsew > 32) + vd = ((vd & 0x00000000FFFFFFFFllu) << 32) | ((vd & 0xFFFFFFFF00000000llu) >> 32); +}) diff --git a/riscv/insns/vrol_vv.h b/riscv/insns/vrol_vv.h new file mode 100644 index 0000000..fb2e483 --- /dev/null +++ b/riscv/insns/vrol_vv.h @@ -0,0 +1,17 @@ +// vrol.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +VI_VV_ULOOP +({ + // For .vv, the shift amount comes from the vs1 element. + const reg_t lshift = vs1 & mask; + const reg_t rshift = (-lshift) & mask; + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vrol_vx.h b/riscv/insns/vrol_vx.h new file mode 100644 index 0000000..b0c89a2 --- /dev/null +++ b/riscv/insns/vrol_vx.h @@ -0,0 +1,18 @@ +// vrol.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vx, the shift amount comes from rs1. +const reg_t lshift = ((reg_t)RS1) & mask; +const reg_t rshift = (-lshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vi.h b/riscv/insns/vror_vi.h new file mode 100644 index 0000000..1269c3d --- /dev/null +++ b/riscv/insns/vror_vi.h @@ -0,0 +1,18 @@ +// vror.vi vd, vs2, zimm6, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vi, the shift amount comes from bits [26,19-15]. +const reg_t rshift = insn.v_zimm6() & mask; +const reg_t lshift = (-rshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vv.h b/riscv/insns/vror_vv.h new file mode 100644 index 0000000..c649c6d --- /dev/null +++ b/riscv/insns/vror_vv.h @@ -0,0 +1,17 @@ +// vror.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +VI_VV_ULOOP +({ + // For .vv, the shift amount comes from the vs1 element. + const reg_t rshift = vs1 & mask; + const reg_t lshift = (-rshift) & mask; + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vx.h b/riscv/insns/vror_vx.h new file mode 100644 index 0000000..50c8e5c --- /dev/null +++ b/riscv/insns/vror_vx.h @@ -0,0 +1,18 @@ +// vror.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vx, the shift amount comes from rs1. +const reg_t rshift = ((reg_t)RS1) & mask; +const reg_t lshift = (-rshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h new file mode 100644 index 0000000..13b5eb4 --- /dev/null +++ b/riscv/insns/vwsll_vi.h @@ -0,0 +1,10 @@ +// vwsll.vi vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VI_WIDENING_ULOOP({ + const reg_t shift = zimm5 & ((2 * sew) - 1); + vd_w = vs2_w << shift; +}); diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h new file mode 100644 index 0000000..5a64c6c --- /dev/null +++ b/riscv/insns/vwsll_vv.h @@ -0,0 +1,10 @@ +// vwsll.vv vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VV_WIDENING_ULOOP({ + const reg_t shift = (vs1 & ((2 * sew) - 1)); + vd_w = vs2_w << shift; +}); diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h new file mode 100644 index 0000000..5264e80 --- /dev/null +++ b/riscv/insns/vwsll_vx.h @@ -0,0 +1,10 @@ +// vwsll.vx vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VX_WIDENING_ULOOP({ + const reg_t shift = (rs1 & ((2 * sew) - 1)); + vd_w = vs2_w << shift; +}); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 3b493a0..4aa23e3 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1340,6 +1340,27 @@ riscv_insn_ext_zacas = \ amocas_d \ $(if $(HAVE_INT128),amocas_q) +riscv_insn_ext_zvbb = \ + vandn_vv \ + vandn_vx \ + vbrev8_v \ + vbrev_v \ + vclz_v \ + vcpop_v \ + vctz_v \ + vrev8_v \ + vrol_vv \ + vrol_vx \ + vror_vi \ + vror_vv \ + vror_vx \ + vwsll_vi \ + vwsll_vv \ + vwsll_vx \ + +riscv_insn_ext_zvk = \ + $(riscv_insn_ext_zvbb) \ + riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ $(riscv_insn_ext_a) \ @@ -1363,6 +1384,7 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh) \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ + $(riscv_insn_ext_zvk) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ |