From 655aedc0ebd2326d69d389bc714c2d622bf2cb08 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 6 Jun 2019 03:24:27 -0700 Subject: rvv: add integer/fixed-point/mask/reduction/permutation instructions based on v-spec 0.7.1, support sections: 12/13/15.1 ~ 15.2/16/17 element size: 8/16/32/64 support ediv: 1 Signed-off-by: Bruce Hoult Signed-off-by: Chih-Min Chao Signed-off-by: Dave Wen --- riscv/decode.h | 1041 ++++++++++++++++++++++++++++++++++++++++++ riscv/insns/vaadd_vi.h | 9 + riscv/insns/vaadd_vv.h | 2 + riscv/insns/vaadd_vx.h | 2 + riscv/insns/vadc_vim.h | 11 + riscv/insns/vadc_vvm.h | 11 + riscv/insns/vadc_vxm.h | 11 + riscv/insns/vadd_vi.h | 5 + riscv/insns/vadd_vv.h | 5 + riscv/insns/vadd_vx.h | 5 + riscv/insns/vand_vi.h | 5 + riscv/insns/vand_vv.h | 5 + riscv/insns/vand_vx.h | 5 + riscv/insns/vasub_vv.h | 2 + riscv/insns/vasub_vx.h | 2 + riscv/insns/vcompress_vm.h | 41 ++ riscv/insns/vdiv_vv.h | 10 + riscv/insns/vdiv_vx.h | 10 + riscv/insns/vdivu_vv.h | 8 + riscv/insns/vdivu_vx.h | 8 + riscv/insns/vdot_vv.h | 5 + riscv/insns/vdotu_vv.h | 5 + riscv/insns/vext_x_v.h | 30 ++ riscv/insns/vid_v.h | 30 ++ riscv/insns/viota_m.h | 52 +++ riscv/insns/vmacc_vv.h | 5 + riscv/insns/vmacc_vx.h | 5 + riscv/insns/vmadc_vim.h | 14 + riscv/insns/vmadc_vvm.h | 14 + riscv/insns/vmadc_vxm.h | 14 + riscv/insns/vmadd_vv.h | 5 + riscv/insns/vmadd_vx.h | 5 + riscv/insns/vmand_mm.h | 2 + riscv/insns/vmandnot_mm.h | 2 + riscv/insns/vmax_vv.h | 10 + riscv/insns/vmax_vx.h | 10 + riscv/insns/vmaxu_vv.h | 9 + riscv/insns/vmaxu_vx.h | 9 + riscv/insns/vmerge_vim.h | 9 + riscv/insns/vmerge_vvm.h | 9 + riscv/insns/vmerge_vxm.h | 9 + riscv/insns/vmfeq_vf.h | 5 + riscv/insns/vmfeq_vv.h | 5 + riscv/insns/vmfge_vf.h | 5 + riscv/insns/vmfgt_vf.h | 5 + riscv/insns/vmfirst_m.h | 20 + riscv/insns/vmfle_vf.h | 5 + riscv/insns/vmfle_vv.h | 5 + riscv/insns/vmflt_vf.h | 5 + riscv/insns/vmflt_vv.h | 5 + riscv/insns/vmfne_vf.h | 5 + riscv/insns/vmfne_vv.h | 5 + riscv/insns/vmford_vf.h | 5 + riscv/insns/vmford_vv.h | 5 + riscv/insns/vmin_vv.h | 11 + riscv/insns/vmin_vx.h | 11 + riscv/insns/vminu_vv.h | 9 + riscv/insns/vminu_vx.h | 10 + riscv/insns/vmnand_mm.h | 2 + riscv/insns/vmnor_mm.h | 2 + riscv/insns/vmor_mm.h | 2 + riscv/insns/vmornot_mm.h | 2 + riscv/insns/vmpopc_m.h | 24 + riscv/insns/vmsbc_vvm.h | 14 + riscv/insns/vmsbc_vxm.h | 14 + riscv/insns/vmsbf_m.h | 34 ++ riscv/insns/vmseq_vi.h | 5 + riscv/insns/vmseq_vv.h | 6 + riscv/insns/vmseq_vx.h | 5 + riscv/insns/vmsgt_vi.h | 5 + riscv/insns/vmsgt_vx.h | 5 + riscv/insns/vmsgtu_vi.h | 5 + riscv/insns/vmsgtu_vx.h | 5 + riscv/insns/vmsif_m.h | 34 ++ riscv/insns/vmsle_vi.h | 5 + riscv/insns/vmsle_vv.h | 5 + riscv/insns/vmsle_vx.h | 5 + riscv/insns/vmsleu_vi.h | 5 + riscv/insns/vmsleu_vv.h | 5 + riscv/insns/vmsleu_vx.h | 5 + riscv/insns/vmslt_vv.h | 5 + riscv/insns/vmslt_vx.h | 5 + riscv/insns/vmsltu_vv.h | 5 + riscv/insns/vmsltu_vx.h | 5 + riscv/insns/vmsne_vi.h | 5 + riscv/insns/vmsne_vv.h | 5 + riscv/insns/vmsne_vx.h | 5 + riscv/insns/vmsof_m.h | 32 ++ riscv/insns/vmul_vv.h | 5 + riscv/insns/vmul_vx.h | 5 + riscv/insns/vmulh_vv.h | 5 + riscv/insns/vmulh_vx.h | 5 + riscv/insns/vmulhsu_vv.h | 37 ++ riscv/insns/vmulhsu_vx.h | 37 ++ riscv/insns/vmulhu_vv.h | 5 + riscv/insns/vmulhu_vx.h | 5 + riscv/insns/vmv_s_x.h | 45 ++ riscv/insns/vmv_v_i.h | 5 + riscv/insns/vmv_v_v.h | 5 + riscv/insns/vmv_v_x.h | 5 + riscv/insns/vmxnor_mm.h | 2 + riscv/insns/vmxor_mm.h | 2 + riscv/insns/vnclip_vi.h | 24 + riscv/insns/vnclip_vv.h | 30 ++ riscv/insns/vnclip_vx.h | 29 ++ riscv/insns/vnclipu_vi.h | 20 + riscv/insns/vnclipu_vv.h | 26 ++ riscv/insns/vnclipu_vx.h | 26 ++ riscv/insns/vnmsac_vv.h | 5 + riscv/insns/vnmsac_vx.h | 5 + riscv/insns/vnmsub_vv.h | 5 + riscv/insns/vnmsub_vx.h | 5 + riscv/insns/vnsra_vi.h | 5 + riscv/insns/vnsra_vv.h | 5 + riscv/insns/vnsra_vx.h | 5 + riscv/insns/vnsrl_vi.h | 5 + riscv/insns/vnsrl_vv.h | 5 + riscv/insns/vnsrl_vx.h | 5 + riscv/insns/vor_vi.h | 5 + riscv/insns/vor_vv.h | 5 + riscv/insns/vor_vx.h | 5 + riscv/insns/vredand_vs.h | 5 + riscv/insns/vredmax_vs.h | 5 + riscv/insns/vredmaxu_vs.h | 5 + riscv/insns/vredmin_vs.h | 5 + riscv/insns/vredminu_vs.h | 5 + riscv/insns/vredor_vs.h | 5 + riscv/insns/vredsum_vs.h | 5 + riscv/insns/vredxor_vs.h | 5 + riscv/insns/vrem_vv.h | 11 + riscv/insns/vrem_vx.h | 10 + riscv/insns/vremu_vv.h | 8 + riscv/insns/vremu_vx.h | 8 + riscv/insns/vrgather_vi.h | 29 ++ riscv/insns/vrgather_vv.h | 39 ++ riscv/insns/vrgather_vx.h | 30 ++ riscv/insns/vrsub_vi.h | 5 + riscv/insns/vrsub_vx.h | 5 + riscv/insns/vsadd_vi.h | 27 ++ riscv/insns/vsadd_vv.h | 28 ++ riscv/insns/vsadd_vx.h | 27 ++ riscv/insns/vsaddu_vi.h | 11 + riscv/insns/vsaddu_vv.h | 11 + riscv/insns/vsaddu_vx.h | 12 + riscv/insns/vsbc_vvm.h | 11 + riscv/insns/vsbc_vxm.h | 11 + riscv/insns/vslide1down_vx.h | 42 ++ riscv/insns/vslide1up_vx.h | 32 ++ riscv/insns/vslidedown_vi.h | 33 ++ riscv/insns/vslidedown_vx.h | 33 ++ riscv/insns/vslideup_vi.h | 33 ++ riscv/insns/vslideup_vx.h | 29 ++ riscv/insns/vsll_vi.h | 5 + riscv/insns/vsll_vv.h | 5 + riscv/insns/vsll_vx.h | 5 + riscv/insns/vsmul_vv.h | 33 ++ riscv/insns/vsmul_vx.h | 34 ++ riscv/insns/vsra_vi.h | 5 + riscv/insns/vsra_vv.h | 5 + riscv/insns/vsra_vx.h | 5 + riscv/insns/vsrl_vi.h | 5 + riscv/insns/vsrl_vv.h | 5 + riscv/insns/vsrl_vx.h | 5 + riscv/insns/vssra_vi.h | 8 + riscv/insns/vssra_vv.h | 9 + riscv/insns/vssra_vx.h | 9 + riscv/insns/vssrl_vi.h | 9 + riscv/insns/vssrl_vv.h | 9 + riscv/insns/vssrl_vx.h | 9 + riscv/insns/vssub_vv.h | 28 ++ riscv/insns/vssub_vx.h | 28 ++ riscv/insns/vssubu_vv.h | 29 ++ riscv/insns/vssubu_vx.h | 28 ++ riscv/insns/vsub_vv.h | 5 + riscv/insns/vsub_vx.h | 5 + riscv/insns/vwadd_vv.h | 6 + riscv/insns/vwadd_vx.h | 6 + riscv/insns/vwadd_wv.h | 6 + riscv/insns/vwadd_wx.h | 6 + riscv/insns/vwaddu_vv.h | 6 + riscv/insns/vwaddu_vx.h | 6 + riscv/insns/vwaddu_wv.h | 6 + riscv/insns/vwaddu_wx.h | 6 + riscv/insns/vwmacc_vv.h | 6 + riscv/insns/vwmacc_vx.h | 6 + riscv/insns/vwmaccsu_vv.h | 6 + riscv/insns/vwmaccsu_vx.h | 6 + riscv/insns/vwmaccu_vv.h | 6 + riscv/insns/vwmaccu_vx.h | 6 + riscv/insns/vwmaccus_vx.h | 6 + riscv/insns/vwmul_vv.h | 6 + riscv/insns/vwmul_vx.h | 6 + riscv/insns/vwmulsu_vv.h | 16 + riscv/insns/vwmulsu_vx.h | 16 + riscv/insns/vwmulu_vv.h | 6 + riscv/insns/vwmulu_vx.h | 6 + riscv/insns/vwredsum_vs.h | 5 + riscv/insns/vwredsumu_vs.h | 5 + riscv/insns/vwsmacc_vv.h | 2 + riscv/insns/vwsmacc_vx.h | 2 + riscv/insns/vwsmaccsu_vv.h | 2 + riscv/insns/vwsmaccsu_vx.h | 2 + riscv/insns/vwsmaccu_vv.h | 2 + riscv/insns/vwsmaccu_vx.h | 2 + riscv/insns/vwsmaccus_vx.h | 2 + riscv/insns/vwsub_vv.h | 6 + riscv/insns/vwsub_vx.h | 6 + riscv/insns/vwsub_wv.h | 6 + riscv/insns/vwsub_wx.h | 6 + riscv/insns/vwsubu_vv.h | 6 + riscv/insns/vwsubu_vx.h | 6 + riscv/insns/vwsubu_wv.h | 6 + riscv/insns/vwsubu_wx.h | 6 + riscv/insns/vxor_vi.h | 5 + riscv/insns/vxor_vv.h | 5 + riscv/insns/vxor_vx.h | 5 + riscv/riscv.mk.in | 206 +++++++++ 217 files changed, 3473 insertions(+) create mode 100644 riscv/insns/vaadd_vi.h create mode 100644 riscv/insns/vaadd_vv.h create mode 100644 riscv/insns/vaadd_vx.h create mode 100644 riscv/insns/vadc_vim.h create mode 100644 riscv/insns/vadc_vvm.h create mode 100644 riscv/insns/vadc_vxm.h create mode 100644 riscv/insns/vadd_vi.h create mode 100644 riscv/insns/vadd_vv.h create mode 100644 riscv/insns/vadd_vx.h create mode 100644 riscv/insns/vand_vi.h create mode 100644 riscv/insns/vand_vv.h create mode 100644 riscv/insns/vand_vx.h create mode 100644 riscv/insns/vasub_vv.h create mode 100644 riscv/insns/vasub_vx.h create mode 100644 riscv/insns/vcompress_vm.h create mode 100644 riscv/insns/vdiv_vv.h create mode 100644 riscv/insns/vdiv_vx.h create mode 100644 riscv/insns/vdivu_vv.h create mode 100644 riscv/insns/vdivu_vx.h create mode 100644 riscv/insns/vdot_vv.h create mode 100644 riscv/insns/vdotu_vv.h create mode 100644 riscv/insns/vext_x_v.h create mode 100644 riscv/insns/vid_v.h create mode 100644 riscv/insns/viota_m.h create mode 100644 riscv/insns/vmacc_vv.h create mode 100644 riscv/insns/vmacc_vx.h create mode 100644 riscv/insns/vmadc_vim.h create mode 100644 riscv/insns/vmadc_vvm.h create mode 100644 riscv/insns/vmadc_vxm.h create mode 100644 riscv/insns/vmadd_vv.h create mode 100644 riscv/insns/vmadd_vx.h create mode 100644 riscv/insns/vmand_mm.h create mode 100644 riscv/insns/vmandnot_mm.h create mode 100644 riscv/insns/vmax_vv.h create mode 100644 riscv/insns/vmax_vx.h create mode 100644 riscv/insns/vmaxu_vv.h create mode 100644 riscv/insns/vmaxu_vx.h create mode 100644 riscv/insns/vmerge_vim.h create mode 100644 riscv/insns/vmerge_vvm.h create mode 100644 riscv/insns/vmerge_vxm.h create mode 100644 riscv/insns/vmfeq_vf.h create mode 100644 riscv/insns/vmfeq_vv.h create mode 100644 riscv/insns/vmfge_vf.h create mode 100644 riscv/insns/vmfgt_vf.h create mode 100644 riscv/insns/vmfirst_m.h create mode 100644 riscv/insns/vmfle_vf.h create mode 100644 riscv/insns/vmfle_vv.h create mode 100644 riscv/insns/vmflt_vf.h create mode 100644 riscv/insns/vmflt_vv.h create mode 100644 riscv/insns/vmfne_vf.h create mode 100644 riscv/insns/vmfne_vv.h create mode 100644 riscv/insns/vmford_vf.h create mode 100644 riscv/insns/vmford_vv.h create mode 100644 riscv/insns/vmin_vv.h create mode 100644 riscv/insns/vmin_vx.h create mode 100644 riscv/insns/vminu_vv.h create mode 100644 riscv/insns/vminu_vx.h create mode 100644 riscv/insns/vmnand_mm.h create mode 100644 riscv/insns/vmnor_mm.h create mode 100644 riscv/insns/vmor_mm.h create mode 100644 riscv/insns/vmornot_mm.h create mode 100644 riscv/insns/vmpopc_m.h create mode 100644 riscv/insns/vmsbc_vvm.h create mode 100644 riscv/insns/vmsbc_vxm.h create mode 100644 riscv/insns/vmsbf_m.h create mode 100644 riscv/insns/vmseq_vi.h create mode 100644 riscv/insns/vmseq_vv.h create mode 100644 riscv/insns/vmseq_vx.h create mode 100644 riscv/insns/vmsgt_vi.h create mode 100644 riscv/insns/vmsgt_vx.h create mode 100644 riscv/insns/vmsgtu_vi.h create mode 100644 riscv/insns/vmsgtu_vx.h create mode 100644 riscv/insns/vmsif_m.h create mode 100644 riscv/insns/vmsle_vi.h create mode 100644 riscv/insns/vmsle_vv.h create mode 100644 riscv/insns/vmsle_vx.h create mode 100644 riscv/insns/vmsleu_vi.h create mode 100644 riscv/insns/vmsleu_vv.h create mode 100644 riscv/insns/vmsleu_vx.h create mode 100644 riscv/insns/vmslt_vv.h create mode 100644 riscv/insns/vmslt_vx.h create mode 100644 riscv/insns/vmsltu_vv.h create mode 100644 riscv/insns/vmsltu_vx.h create mode 100644 riscv/insns/vmsne_vi.h create mode 100644 riscv/insns/vmsne_vv.h create mode 100644 riscv/insns/vmsne_vx.h create mode 100644 riscv/insns/vmsof_m.h create mode 100644 riscv/insns/vmul_vv.h create mode 100644 riscv/insns/vmul_vx.h create mode 100644 riscv/insns/vmulh_vv.h create mode 100644 riscv/insns/vmulh_vx.h create mode 100644 riscv/insns/vmulhsu_vv.h create mode 100644 riscv/insns/vmulhsu_vx.h create mode 100644 riscv/insns/vmulhu_vv.h create mode 100644 riscv/insns/vmulhu_vx.h create mode 100644 riscv/insns/vmv_s_x.h create mode 100644 riscv/insns/vmv_v_i.h create mode 100644 riscv/insns/vmv_v_v.h create mode 100644 riscv/insns/vmv_v_x.h create mode 100644 riscv/insns/vmxnor_mm.h create mode 100644 riscv/insns/vmxor_mm.h create mode 100644 riscv/insns/vnclip_vi.h create mode 100644 riscv/insns/vnclip_vv.h create mode 100644 riscv/insns/vnclip_vx.h create mode 100644 riscv/insns/vnclipu_vi.h create mode 100644 riscv/insns/vnclipu_vv.h create mode 100644 riscv/insns/vnclipu_vx.h create mode 100644 riscv/insns/vnmsac_vv.h create mode 100644 riscv/insns/vnmsac_vx.h create mode 100644 riscv/insns/vnmsub_vv.h create mode 100644 riscv/insns/vnmsub_vx.h create mode 100644 riscv/insns/vnsra_vi.h create mode 100644 riscv/insns/vnsra_vv.h create mode 100644 riscv/insns/vnsra_vx.h create mode 100644 riscv/insns/vnsrl_vi.h create mode 100644 riscv/insns/vnsrl_vv.h create mode 100644 riscv/insns/vnsrl_vx.h create mode 100644 riscv/insns/vor_vi.h create mode 100644 riscv/insns/vor_vv.h create mode 100644 riscv/insns/vor_vx.h create mode 100644 riscv/insns/vredand_vs.h create mode 100644 riscv/insns/vredmax_vs.h create mode 100644 riscv/insns/vredmaxu_vs.h create mode 100644 riscv/insns/vredmin_vs.h create mode 100644 riscv/insns/vredminu_vs.h create mode 100644 riscv/insns/vredor_vs.h create mode 100644 riscv/insns/vredsum_vs.h create mode 100644 riscv/insns/vredxor_vs.h create mode 100644 riscv/insns/vrem_vv.h create mode 100644 riscv/insns/vrem_vx.h create mode 100644 riscv/insns/vremu_vv.h create mode 100644 riscv/insns/vremu_vx.h create mode 100644 riscv/insns/vrgather_vi.h create mode 100644 riscv/insns/vrgather_vv.h create mode 100644 riscv/insns/vrgather_vx.h create mode 100644 riscv/insns/vrsub_vi.h create mode 100644 riscv/insns/vrsub_vx.h create mode 100644 riscv/insns/vsadd_vi.h create mode 100644 riscv/insns/vsadd_vv.h create mode 100644 riscv/insns/vsadd_vx.h create mode 100644 riscv/insns/vsaddu_vi.h create mode 100644 riscv/insns/vsaddu_vv.h create mode 100644 riscv/insns/vsaddu_vx.h create mode 100644 riscv/insns/vsbc_vvm.h create mode 100644 riscv/insns/vsbc_vxm.h create mode 100644 riscv/insns/vslide1down_vx.h create mode 100644 riscv/insns/vslide1up_vx.h create mode 100644 riscv/insns/vslidedown_vi.h create mode 100644 riscv/insns/vslidedown_vx.h create mode 100644 riscv/insns/vslideup_vi.h create mode 100644 riscv/insns/vslideup_vx.h create mode 100644 riscv/insns/vsll_vi.h create mode 100644 riscv/insns/vsll_vv.h create mode 100644 riscv/insns/vsll_vx.h create mode 100644 riscv/insns/vsmul_vv.h create mode 100644 riscv/insns/vsmul_vx.h create mode 100644 riscv/insns/vsra_vi.h create mode 100644 riscv/insns/vsra_vv.h create mode 100644 riscv/insns/vsra_vx.h create mode 100644 riscv/insns/vsrl_vi.h create mode 100644 riscv/insns/vsrl_vv.h create mode 100644 riscv/insns/vsrl_vx.h create mode 100644 riscv/insns/vssra_vi.h create mode 100644 riscv/insns/vssra_vv.h create mode 100644 riscv/insns/vssra_vx.h create mode 100644 riscv/insns/vssrl_vi.h create mode 100644 riscv/insns/vssrl_vv.h create mode 100644 riscv/insns/vssrl_vx.h create mode 100644 riscv/insns/vssub_vv.h create mode 100644 riscv/insns/vssub_vx.h create mode 100644 riscv/insns/vssubu_vv.h create mode 100644 riscv/insns/vssubu_vx.h create mode 100644 riscv/insns/vsub_vv.h create mode 100644 riscv/insns/vsub_vx.h create mode 100644 riscv/insns/vwadd_vv.h create mode 100644 riscv/insns/vwadd_vx.h create mode 100644 riscv/insns/vwadd_wv.h create mode 100644 riscv/insns/vwadd_wx.h create mode 100644 riscv/insns/vwaddu_vv.h create mode 100644 riscv/insns/vwaddu_vx.h create mode 100644 riscv/insns/vwaddu_wv.h create mode 100644 riscv/insns/vwaddu_wx.h create mode 100644 riscv/insns/vwmacc_vv.h create mode 100644 riscv/insns/vwmacc_vx.h create mode 100644 riscv/insns/vwmaccsu_vv.h create mode 100644 riscv/insns/vwmaccsu_vx.h create mode 100644 riscv/insns/vwmaccu_vv.h create mode 100644 riscv/insns/vwmaccu_vx.h create mode 100644 riscv/insns/vwmaccus_vx.h create mode 100644 riscv/insns/vwmul_vv.h create mode 100644 riscv/insns/vwmul_vx.h create mode 100644 riscv/insns/vwmulsu_vv.h create mode 100644 riscv/insns/vwmulsu_vx.h create mode 100644 riscv/insns/vwmulu_vv.h create mode 100644 riscv/insns/vwmulu_vx.h create mode 100644 riscv/insns/vwredsum_vs.h create mode 100644 riscv/insns/vwredsumu_vs.h create mode 100644 riscv/insns/vwsmacc_vv.h create mode 100644 riscv/insns/vwsmacc_vx.h create mode 100644 riscv/insns/vwsmaccsu_vv.h create mode 100644 riscv/insns/vwsmaccsu_vx.h create mode 100644 riscv/insns/vwsmaccu_vv.h create mode 100644 riscv/insns/vwsmaccu_vx.h create mode 100644 riscv/insns/vwsmaccus_vx.h create mode 100644 riscv/insns/vwsub_vv.h create mode 100644 riscv/insns/vwsub_vx.h create mode 100644 riscv/insns/vwsub_wv.h create mode 100644 riscv/insns/vwsub_wx.h create mode 100644 riscv/insns/vwsubu_vv.h create mode 100644 riscv/insns/vwsubu_vx.h create mode 100644 riscv/insns/vwsubu_wv.h create mode 100644 riscv/insns/vwsubu_wx.h create mode 100644 riscv/insns/vxor_vi.h create mode 100644 riscv/insns/vxor_vv.h create mode 100644 riscv/insns/vxor_vx.h diff --git a/riscv/decode.h b/riscv/decode.h index 6cbf934..ca6a999 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -11,6 +11,7 @@ # error spike requires a little-endian host #endif +#include #include #include #include @@ -23,6 +24,8 @@ typedef int64_t sreg_t; typedef uint64_t reg_t; +typedef __int128 int128_t; +typedef unsigned __int128 uint128_t; const int NXPR = 32; const int NFPR = 32; @@ -63,6 +66,12 @@ const int NCSR = 4096; #define MAX_INSN_LENGTH 8 #define PC_ALIGN 2 +#ifndef TAIL_ZEROING + #define TAIL_ZEROING true +#else + #define TAIL_ZEROING false +#endif + typedef uint64_t insn_bits_t; class insn_t { @@ -141,8 +150,10 @@ private: #define P (*p) #define READ_REG(reg) STATE.XPR[reg] #define READ_FREG(reg) STATE.FPR[reg] +#define RD READ_REG(insn.rd()) #define RS1 READ_REG(insn.rs1()) #define RS2 READ_REG(insn.rs2()) +#define RS3 READ_REG(insn.rs3()) #define WRITE_RD(value) WRITE_REG(insn.rd(), value) #ifndef RISCV_ENABLE_COMMITLOG @@ -288,6 +299,1036 @@ inline freg_t f128_negate(freg_t a) throw trap_illegal_instruction(0); \ (which); }) +/* For debug only. This will fail if the native machine's float types are not IEEE */ +inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;} +inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;} +inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;} + +// Vector macros +#define e8 8 // 8b elements +#define e16 16 // 16b elements +#define e32 32 // 32b elements +#define e64 64 // 64b elements +#define e128 128 // 128b elements + +#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew)) +#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew)) + +// +// vector: masking skip helper +// +#define VI_LOOP_ELEMENT_SKIP(BODY) \ + const int mlen = P.VU.vmlen; \ + const int midx = (mlen * i) / 64; \ + const int mpos = (mlen * i) % 64; \ + if (insn.v_vm() == 0) { \ + BODY; \ + bool skip = ((P.VU.elt(0, midx) >> mpos) & 0x1) == 0; \ + if (skip) \ + continue; \ + } + +#define VI_ELEMENT_SKIP(inx) \ + if (inx >= vl && TAIL_ZEROING) { \ + is_valid = false; \ + } else if (inx >= vl && !TAIL_ZEROING) { \ + continue; \ + } else if (inx < P.VU.vstart) { \ + continue; \ + } else { \ + VI_LOOP_ELEMENT_SKIP(); \ + } + +// +// vector: operation and register acccess check helper +// +static inline bool is_overlaped(const int astart, const int asize, + const int bstart, const int bsize) +{ + const int aend = astart + asize; + const int bend = bstart + bsize; + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} + +#define VI_NARROW_CHECK_COMMON \ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require(insn.rs2() + P.VU.vlmul * 2 <= 32); + +#define VI_WIDE_CHECK_COMMON \ + require(!P.VU.vill);\ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require(insn.rd() + P.VU.vlmul * 2 <= 32); \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); + +#define VI_CHECK_VREG_OVERLAP(v1, v2) \ + require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul)); + +#define VI_CHECK_SS \ + require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul)); + +#define VI_CHECK_SD \ + require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); + +#define VI_CHECK_DSS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ + if (is_rs) \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + +#define VI_CHECK_DDS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require(insn.rs2() + P.VU.vlmul * 2 <= 32); \ + if (is_rs) \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + +// +// vector: loop header and end helper +// +#define VI_GENERAL_LOOP_BASE \ + require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i(rd_num, vl * ((sew >> 3) * elm)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * elm)); \ + } + +#define VI_TAIL_ZERO_MASK(dst) \ + if (vl != 0 && TAIL_ZEROING){ \ + for (reg_t i=vl; i> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt(dst, midx); \ + vdi = (vdi & ~mmask);\ + }\ + }\ + +#define VI_LOOP_BASE \ + VI_GENERAL_LOOP_BASE \ + VI_LOOP_ELEMENT_SKIP(); + +#define VI_LOOP_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt(rd_num, vl * ((sew >> 3) * 1)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 1)); \ + }\ + P.VU.vstart = 0; + +#define VI_LOOP_END_NO_TAIL_ZERO \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_WIDEN_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt(rd_num, vl * ((sew >> 3) * 2)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 2)); \ + }\ + P.VU.vstart = 0; + +#define VI_LOOP_REDUCTION_END(x) \ + } \ + if (vl > 0 && TAIL_ZEROING) { \ + vd_0_des = vd_0_res; \ + uint8_t *tail = (uint8_t *)&P.VU.elt::type>(rd_num, 1); \ + memset(tail, 0, (P.VU.get_vlen() - x) >> 3); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_CMP_BASE \ + require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt(insn.rd(), midx); \ + uint64_t res = 0; + +#define VI_LOOP_CMP_END \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + } \ + VI_TAIL_ZERO_MASK(rd_num); \ + P.VU.vstart = 0; + +#define VI_LOOP_MASK(op) \ + require(P.VU.vsew <= e64); \ + reg_t vl = P.VU.vl; \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + int mlen = P.VU.vmlen; \ + int midx = (mlen * i) / 64; \ + int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t vs2 = P.VU.elt(insn.rs2(), midx); \ + uint64_t vs1 = P.VU.elt(insn.rs1(), midx); \ + uint64_t &res = P.VU.elt(insn.rd(), midx); \ + res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + } \ + \ + if (TAIL_ZEROING) {\ + for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \ + int mlen = P.VU.vmlen; \ + int midx = (mlen * i) / 64; \ + int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &res = P.VU.elt(insn.rd(), midx); \ + res = (res & ~mmask); \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_NSHIFT_BASE \ + require(P.VU.vsew <= e32); \ + if (insn.rd() != 0){ \ + VI_CHECK_SD; \ + } \ + VI_GENERAL_LOOP_BASE; \ + VI_LOOP_ELEMENT_SKIP({\ + require(!(insn.rd() == 0 && P.VU.vlmul > 1));\ + }); + + +#define INT_ROUNDING(result, xrm, gb) \ + if (gb > 0) { \ + switch(xrm) {\ + case VRM::RNU:\ + result += ((uint64_t)1 << ((gb) - 1));\ + break;\ + case VRM::RNE:\ + if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\ + result -= ((uint64_t)1 << ((gb) - 1));\ + }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\ + result += ((uint64_t)1 << ((gb) - 1));\ + }\ + break;\ + case VRM::RDN:\ + result = (result >> ((gb) - 1)) << ((gb) - 1);\ + break;\ + case VRM::ROD:\ + result |= ((uint64_t)1ul << (gb)); \ + break;\ + case VRM::INVALID_RM:\ + assert(true);\ + } \ + } else if (gb == 0 && xrm == VRM::ROD) { \ + result |= 1ul; \ + } + + +// +// vector: integer and masking operand access helper +// +#define VXI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); + +#define VV_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_usew_t::type simm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define XV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, RS1); + +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i + off); + +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i - offset); + +#define VI_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto zimm5 = (type_usew_t::type)insn.v_zimm5(); + +#define VX_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; + +#define VV_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); + +#define XI_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + auto simm5 = (type_sew_t::type)insn.v_simm5(); \ + auto &vd = P.VU.elt(rd_num, midx); + +#define VV_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto &vd = P.VU.elt(rd_num, midx); + +// +// vector: integer and masking operation loop +// + +// comparision result to masking register +#define VI_VV_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VV_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +// merge and copy loop +#define VI_VVXI_MERGE_LOOP(BODY) \ + VI_GENERAL_LOOP_BASE \ + if (sew == e8){ \ + VXI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VXI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VXI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VXI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// reduction loop - signed +#define VI_LOOP_REDUCTION_BASE(x) \ + require(x == e8 || x == e16 || x == e32 || x == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); \ + +#define REDUCTION_LOOP(x, BODY) \ + VI_LOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_LOOP_REDUCTION(BODY) \ + reg_t sew = P.VU.vsew; \ + if (sew == e8) { \ + REDUCTION_LOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_LOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_LOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_LOOP(e64, BODY) \ + } + +// reduction loop - unsgied +#define VI_ULOOP_REDUCTION_BASE(x) \ + require(x == e8 || x == e16 || x == e32 || x == e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define REDUCTION_ULOOP(x, BODY) \ + VI_ULOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_ULOOP_REDUCTION(BODY) \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + REDUCTION_ULOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_ULOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_ULOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_ULOOP(e64, BODY) \ + } + +// genearl VXI signed/unsgied loop +#define VI_VV_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// narrow operation loop +#define VI_VV_LOOP_NARROW(BODY) \ +VI_NARROW_CHECK_COMMON; \ +VI_LOOP_BASE \ +if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ +}else if(sew == e16){ \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ +}else if(sew == e32){ \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ +} \ +VI_LOOP_END + +#define VI_NARROW_SHIFT(sew1, sew2) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i); \ + type_usew_t::type vs2_u = P.VU.elt::type>(rs2_num, i); \ + type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; + +#define VI_VVXI_LOOP_NARROW(BODY) \ + require(P.VU.vsew <= e32); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VI_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VX_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VX_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VX_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VV_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VV_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VV_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +// widen operation loop +#define VI_VV_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_WIDEN_END + +#define VI_VX_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign##16_t)(sign##8_t)var0 op0 (sign##16_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign##32_t)(sign##16_t)var0 op0 (sign##32_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign##64_t)(sign##32_t)var0 op0 (sign##64_t)(sign##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign_1##16_t)(sign_1##8_t)var0 op0 (sign_2##16_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign_d##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign_1##32_t)(sign_1##16_t)var0 op0 (sign_2##32_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i) = \ + op1((sign_1##64_t)(sign_1##32_t)var0 op0 (sign_2##64_t)(sign_2##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_WVX_OP(var0, op0, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t &vd_w = P.VU.elt(rd_num, i); \ + sign##16_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##16_t)(sign##8_t)var0; \ + } \ + break; \ + case e16: { \ + sign##32_t &vd_w = P.VU.elt(rd_num, i); \ + sign##32_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##32_t)(sign##16_t)var0; \ + } \ + break; \ + default: { \ + sign##64_t &vd_w = P.VU.elt(rd_num, i); \ + sign##64_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##64_t)(sign##32_t)var0; \ + } \ + break; \ + } + +#define VI_WIDE_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (int##sew2##_t)vs2 * (int##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + res = res >> gb; \ + vd = sat_add(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_USSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_usew_t::type)RS1; \ + uint##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (uint##sew2##_t)vs2 * (uint##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_addu(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_USSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_USSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_USSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_USSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_SU_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (uint##sew2##_t)vs2 * (int##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_sub(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_SU_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_SU_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_SU_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_US_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt::type>(rd_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_usew_t::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (int##sew2##_t)vs2 * (uint##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_sub(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_US_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_US_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_US_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_US_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +// wide reduction loop - signed +#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + VI_CHECK_DSS(false); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_LOOP(sew1, sew2, BODY) \ + VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ + require(!P.VU.vill);\ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_LOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_LOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_LOOP(e32, e64, BODY) \ + } + +// wide reduction loop - unsigned +#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + VI_CHECK_DSS(false); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_ULOOP(sew1, sew2, BODY) \ + VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ + require(!P.VU.vill);\ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_ULOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_ULOOP(e32, e64, BODY) \ + } + +// carry/borrow bit loop +#define VI_VV_LOOP_CARRY(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_CARRY_PARAMS(e64) \ + BODY; \ + } \ + } \ + VI_TAIL_ZERO_MASK(rd_num); + +#define VI_XI_LOOP_CARRY(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + XI_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_CARRY_PARAMS(e64) \ + BODY; \ + } \ + } \ + VI_TAIL_ZERO_MASK(rd_num); + +// average loop +#define VI_VVX_LOOP_AVG(opd, op) \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_PARAMS(e8); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_PARAMS(e16); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_PARAMS(e32); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_PARAMS(e64); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END // Seems that 0x0 doesn't work. #define DEBUG_START 0x100 #define DEBUG_END (0x1000 - 1) diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h new file mode 100644 index 0000000..5f8d5f5 --- /dev/null +++ b/riscv/insns/vaadd_vi.h @@ -0,0 +1,9 @@ +// vaadd: Averaging adds of integers +VRM xrm = P.VU.get_vround_mode(); +VI_VI_LOOP +({ + int64_t result = simm5 + vs2; + INT_ROUNDING(result, xrm, 1); + result = vzext(result >> 1, sew); + vd = result; +}) diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h new file mode 100644 index 0000000..b479970 --- /dev/null +++ b/riscv/insns/vaadd_vv.h @@ -0,0 +1,2 @@ +// vaadd.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, +); diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h new file mode 100644 index 0000000..c811a0a --- /dev/null +++ b/riscv/insns/vaadd_vx.h @@ -0,0 +1,2 @@ +// vaadd.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, +); diff --git a/riscv/insns/vadc_vim.h b/riscv/insns/vadc_vim.h new file mode 100644 index 0000000..e21e2f8 --- /dev/null +++ b/riscv/insns/vadc_vim.h @@ -0,0 +1,11 @@ +// vadc.vim vd, vs2, simm5 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VI_LOOP +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vvm.h b/riscv/insns/vadc_vvm.h new file mode 100644 index 0000000..b708ac1 --- /dev/null +++ b/riscv/insns/vadc_vvm.h @@ -0,0 +1,11 @@ +// vadc.vvm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VV_LOOP +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vxm.h b/riscv/insns/vadc_vxm.h new file mode 100644 index 0000000..6c6e6dc --- /dev/null +++ b/riscv/insns/vadc_vxm.h @@ -0,0 +1,11 @@ +// vadc.vxm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VX_LOOP +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadd_vi.h b/riscv/insns/vadd_vi.h new file mode 100644 index 0000000..45fc6b7 --- /dev/null +++ b/riscv/insns/vadd_vi.h @@ -0,0 +1,5 @@ +// vadd.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 + vs2; +}) diff --git a/riscv/insns/vadd_vv.h b/riscv/insns/vadd_vv.h new file mode 100644 index 0000000..45c6bdc --- /dev/null +++ b/riscv/insns/vadd_vv.h @@ -0,0 +1,5 @@ +// vadd.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 + vs2; +}) diff --git a/riscv/insns/vadd_vx.h b/riscv/insns/vadd_vx.h new file mode 100644 index 0000000..33e72ee --- /dev/null +++ b/riscv/insns/vadd_vx.h @@ -0,0 +1,5 @@ +// vadd.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 + vs2; +}) diff --git a/riscv/insns/vand_vi.h b/riscv/insns/vand_vi.h new file mode 100644 index 0000000..dd9618b --- /dev/null +++ b/riscv/insns/vand_vi.h @@ -0,0 +1,5 @@ +// vand.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 & vs2; +}) diff --git a/riscv/insns/vand_vv.h b/riscv/insns/vand_vv.h new file mode 100644 index 0000000..65558e4 --- /dev/null +++ b/riscv/insns/vand_vv.h @@ -0,0 +1,5 @@ +// vand.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 & vs2; +}) diff --git a/riscv/insns/vand_vx.h b/riscv/insns/vand_vx.h new file mode 100644 index 0000000..8eea1ed --- /dev/null +++ b/riscv/insns/vand_vx.h @@ -0,0 +1,5 @@ +// vand.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 & vs2; +}) diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h new file mode 100644 index 0000000..5a5ccc9 --- /dev/null +++ b/riscv/insns/vasub_vv.h @@ -0,0 +1,2 @@ +// vasub.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, -); diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h new file mode 100644 index 0000000..c3cad4b --- /dev/null +++ b/riscv/insns/vasub_vx.h @@ -0,0 +1,2 @@ +// vasub.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, -); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h new file mode 100644 index 0000000..2e0784c --- /dev/null +++ b/riscv/insns/vcompress_vm.h @@ -0,0 +1,41 @@ +// vcompress vd, vs2, vs1 +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +require(P.VU.vstart == 0); +reg_t sew = P.VU.vsew; +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +reg_t pos = 0; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 64; + const int mpos = (mlen * i) % 64; + + bool do_mask = (P.VU.elt(rs1_num, midx) >> mpos) & 0x1; + if (do_mask) { + switch (sew) { + case e8: + P.VU.elt(rd_num, pos) = P.VU.elt(rs2_num, i); + break; + case e16: + P.VU.elt(rd_num, pos) = P.VU.elt(rs2_num, i); + break; + case e32: + P.VU.elt(rd_num, pos) = P.VU.elt(rs2_num, i); + break; + default: + P.VU.elt(rd_num, pos) = P.VU.elt(rs2_num, i); + break; + } + + ++pos; + } +} + +if (vl > 0 && TAIL_ZEROING) { + uint8_t *tail = &P.VU.elt(rd_num, pos * ((sew >> 3) * 1)); + memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1)); +} + diff --git a/riscv/insns/vdiv_vv.h b/riscv/insns/vdiv_vv.h new file mode 100644 index 0000000..67da162 --- /dev/null +++ b/riscv/insns/vdiv_vv.h @@ -0,0 +1,10 @@ +// vdiv.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = -1; + else if (vs2 == -(1 << (sew - 1)) && vs1 == -1) + vd = vs2; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdiv_vx.h b/riscv/insns/vdiv_vx.h new file mode 100644 index 0000000..1a152bd --- /dev/null +++ b/riscv/insns/vdiv_vx.h @@ -0,0 +1,10 @@ +// vdiv.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if(rs1 == 0) + vd = -1; + else if(vs2 == -(1 << (sew - 1)) && rs1 == -1) + vd = vs2; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdivu_vv.h b/riscv/insns/vdivu_vv.h new file mode 100644 index 0000000..ef6e777 --- /dev/null +++ b/riscv/insns/vdivu_vv.h @@ -0,0 +1,8 @@ +// vdivu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if(vs1 == 0) + vd = -1; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdivu_vx.h b/riscv/insns/vdivu_vx.h new file mode 100644 index 0000000..7ffe1c6 --- /dev/null +++ b/riscv/insns/vdivu_vx.h @@ -0,0 +1,8 @@ +// vdivu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if(rs1 == 0) + vd = -1; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdot_vv.h b/riscv/insns/vdot_vv.h new file mode 100644 index 0000000..7685230 --- /dev/null +++ b/riscv/insns/vdot_vv.h @@ -0,0 +1,5 @@ +// vdot vd, vs2, vs1 +VI_VV_LOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vdotu_vv.h b/riscv/insns/vdotu_vv.h new file mode 100644 index 0000000..9c4c59d --- /dev/null +++ b/riscv/insns/vdotu_vv.h @@ -0,0 +1,5 @@ +// vdotu vd, vs2, vs1 +VI_VV_ULOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vext_x_v.h b/riscv/insns/vext_x_v.h new file mode 100644 index 0000000..837cc22 --- /dev/null +++ b/riscv/insns/vext_x_v.h @@ -0,0 +1,30 @@ +// vext_x_v: rd = vs2[rs1] +require(insn.v_vm() == 1); +uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen()); +reg_t rs1 = RS1; +VI_LOOP_BASE +VI_LOOP_END_NO_TAIL_ZERO +if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen()/sew))) { + WRITE_RD(0); +} else { + switch(sew) { + case e8: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e16: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e32: + if (P.get_max_xlen() == 32) + WRITE_RD(P.VU.elt(rs2_num, rs1)); + else + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e64: + if (P.get_max_xlen() <= sew) + WRITE_RD(P.VU.elt(rs2_num, rs1) & xmask); + else + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + } +} diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h new file mode 100644 index 0000000..ac111d0 --- /dev/null +++ b/riscv/insns/vid_v.h @@ -0,0 +1,30 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); + +for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i) = i; + break; + case e16: + P.VU.elt(rd_num, i) = i; + break; + case e32: + P.VU.elt(rd_num, i) = i; + break; + default: + P.VU.elt(rd_num, i) = i; + break; + } +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h new file mode 100644 index 0000000..c7b831a --- /dev/null +++ b/riscv/insns/viota_m.h @@ -0,0 +1,52 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); + +int cnt = 0; +for (reg_t i = 0; i < vl; ++i) { + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 64; + const int mpos = (mlen * i) % 64; + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + bool has_one = false; + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + if (vs2_lsb) { + has_one = true; + } + } + + bool use_ori = (insn.v_vm() == 0) && !do_mask; + switch (sew) { + case e8: + P.VU.elt(rd_num, i) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e16: + P.VU.elt(rd_num, i) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e32: + P.VU.elt(rd_num, i) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + default: + P.VU.elt(rd_num, i) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + } + + if (has_one) { + cnt++; + } +} + +VI_TAIL_ZERO(1); diff --git a/riscv/insns/vmacc_vv.h b/riscv/insns/vmacc_vv.h new file mode 100644 index 0000000..e6ec93f --- /dev/null +++ b/riscv/insns/vmacc_vv.h @@ -0,0 +1,5 @@ +// vmacc.vv: vd[i] = +(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = vs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmacc_vx.h b/riscv/insns/vmacc_vx.h new file mode 100644 index 0000000..d40b264 --- /dev/null +++ b/riscv/insns/vmacc_vx.h @@ -0,0 +1,5 @@ +// vmacc.vx: vd[i] = +(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = rs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h new file mode 100644 index 0000000..fd79089 --- /dev/null +++ b/riscv/insns/vmadc_vim.h @@ -0,0 +1,14 @@ +// vmadc.vim vd, vs2, simm5 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h new file mode 100644 index 0000000..82042ca --- /dev/null +++ b/riscv/insns/vmadc_vvm.h @@ -0,0 +1,14 @@ +// vmadc.vvm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h new file mode 100644 index 0000000..8f26584 --- /dev/null +++ b/riscv/insns/vmadc_vxm.h @@ -0,0 +1,14 @@ +// vadc.vx vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadd_vv.h b/riscv/insns/vmadd_vv.h new file mode 100644 index 0000000..a1c0d2e --- /dev/null +++ b/riscv/insns/vmadd_vv.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = vd * vs1 + vs2; +}) diff --git a/riscv/insns/vmadd_vx.h b/riscv/insns/vmadd_vx.h new file mode 100644 index 0000000..1a8a001 --- /dev/null +++ b/riscv/insns/vmadd_vx.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = vd * rs1 + vs2; +}) diff --git a/riscv/insns/vmand_mm.h b/riscv/insns/vmand_mm.h new file mode 100644 index 0000000..04615c6 --- /dev/null +++ b/riscv/insns/vmand_mm.h @@ -0,0 +1,2 @@ +// vmand.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & vs1); diff --git a/riscv/insns/vmandnot_mm.h b/riscv/insns/vmandnot_mm.h new file mode 100644 index 0000000..4c26469 --- /dev/null +++ b/riscv/insns/vmandnot_mm.h @@ -0,0 +1,2 @@ +// vmandnot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & ~vs1); diff --git a/riscv/insns/vmax_vv.h b/riscv/insns/vmax_vv.h new file mode 100644 index 0000000..b9f15c5 --- /dev/null +++ b/riscv/insns/vmax_vv.h @@ -0,0 +1,10 @@ +// vmax.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmax_vx.h b/riscv/insns/vmax_vx.h new file mode 100644 index 0000000..06f3f43 --- /dev/null +++ b/riscv/insns/vmax_vx.h @@ -0,0 +1,10 @@ +// vmax.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmaxu_vv.h b/riscv/insns/vmaxu_vv.h new file mode 100644 index 0000000..4e6868d --- /dev/null +++ b/riscv/insns/vmaxu_vv.h @@ -0,0 +1,9 @@ +// vmaxu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmaxu_vx.h b/riscv/insns/vmaxu_vx.h new file mode 100644 index 0000000..cab8918 --- /dev/null +++ b/riscv/insns/vmaxu_vx.h @@ -0,0 +1,9 @@ +// vmaxu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h new file mode 100644 index 0000000..13354d6 --- /dev/null +++ b/riscv/insns/vmerge_vim.h @@ -0,0 +1,9 @@ +// vmerge.vim vd, vs2, simm5 +VI_VVXI_MERGE_LOOP +({ + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? simm5 : vs2; +}) diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h new file mode 100644 index 0000000..7530b40 --- /dev/null +++ b/riscv/insns/vmerge_vvm.h @@ -0,0 +1,9 @@ +// vmerge.vvm vd, vs2, vs1 +VI_VVXI_MERGE_LOOP +({ + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? vs1 : vs2; +}) diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h new file mode 100644 index 0000000..b1757fa --- /dev/null +++ b/riscv/insns/vmerge_vxm.h @@ -0,0 +1,9 @@ +// vmerge.vxm vd, vs2, rs1 +VI_VVXI_MERGE_LOOP +({ + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; +}) diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h new file mode 100644 index 0000000..cedf4b9 --- /dev/null +++ b/riscv/insns/vmfeq_vf.h @@ -0,0 +1,5 @@ +// vfeq.vf vd, vs2, fs1 +VI_VFP_LOOP_CMP +({ + res = f32_eq(vs2, rs1); +}) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h new file mode 100644 index 0000000..7e76cac --- /dev/null +++ b/riscv/insns/vmfeq_vv.h @@ -0,0 +1,5 @@ +// vfeq.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f32_eq(vs2, vs1); +}) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h new file mode 100644 index 0000000..7eade89 --- /dev/null +++ b/riscv/insns/vmfge_vf.h @@ -0,0 +1,5 @@ +// vfge.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f32_le_quiet(rs1, vs2); +}) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h new file mode 100644 index 0000000..6115d06 --- /dev/null +++ b/riscv/insns/vmfgt_vf.h @@ -0,0 +1,5 @@ +// vfgt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f32_lt_quiet(rs1, vs2); +}) diff --git a/riscv/insns/vmfirst_m.h b/riscv/insns/vmfirst_m.h new file mode 100644 index 0000000..8c216c0 --- /dev/null +++ b/riscv/insns/vmfirst_m.h @@ -0,0 +1,20 @@ +// vmfirst rd, vs2 +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t pos = -1; +for (reg_t i=P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP() + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (vs2_lsb) { + pos = i; + break; + } +} +P.VU.vstart = 0; +WRITE_RD(pos); diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h new file mode 100644 index 0000000..998b93b --- /dev/null +++ b/riscv/insns/vmfle_vf.h @@ -0,0 +1,5 @@ +// vfle.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f32_le(vs2, rs1); +}) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h new file mode 100644 index 0000000..c716312 --- /dev/null +++ b/riscv/insns/vmfle_vv.h @@ -0,0 +1,5 @@ +// vfle.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f32_le_quiet(vs2, vs1); +}) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h new file mode 100644 index 0000000..af436e4 --- /dev/null +++ b/riscv/insns/vmflt_vf.h @@ -0,0 +1,5 @@ +// vflt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f32_lt_quiet(vs2, rs1); +}) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h new file mode 100644 index 0000000..ded867d --- /dev/null +++ b/riscv/insns/vmflt_vv.h @@ -0,0 +1,5 @@ +// vflt.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f32_lt_quiet(vs2, vs1); +}) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h new file mode 100644 index 0000000..ac2eced --- /dev/null +++ b/riscv/insns/vmfne_vf.h @@ -0,0 +1,5 @@ +// vfne.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f32_eq(vs2, rs1); +}) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h new file mode 100644 index 0000000..3fa8beb --- /dev/null +++ b/riscv/insns/vmfne_vv.h @@ -0,0 +1,5 @@ +// vfne.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f32_eq(vs2, vs1); +}) diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h new file mode 100644 index 0000000..b5e74f2 --- /dev/null +++ b/riscv/insns/vmford_vf.h @@ -0,0 +1,5 @@ +// vford.vf vd, vs2, rs1, vm +VI_VFP_LOOP_CMP +({ + res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1)); +}) diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h new file mode 100644 index 0000000..2e459c1 --- /dev/null +++ b/riscv/insns/vmford_vv.h @@ -0,0 +1,5 @@ +// vford.vv vd, vs2, vs1, vm +VI_VFP_LOOP_CMP +({ + res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1)); +}) diff --git a/riscv/insns/vmin_vv.h b/riscv/insns/vmin_vv.h new file mode 100644 index 0000000..21da0b3 --- /dev/null +++ b/riscv/insns/vmin_vv.h @@ -0,0 +1,11 @@ +// vmin.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vmin_vx.h b/riscv/insns/vmin_vx.h new file mode 100644 index 0000000..3291776 --- /dev/null +++ b/riscv/insns/vmin_vx.h @@ -0,0 +1,11 @@ +// vminx.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vminu_vv.h b/riscv/insns/vminu_vv.h new file mode 100644 index 0000000..c0ab195 --- /dev/null +++ b/riscv/insns/vminu_vv.h @@ -0,0 +1,9 @@ +// vminu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vminu_vx.h b/riscv/insns/vminu_vx.h new file mode 100644 index 0000000..1055895 --- /dev/null +++ b/riscv/insns/vminu_vx.h @@ -0,0 +1,10 @@ +// vminu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h new file mode 100644 index 0000000..5a3ab09 --- /dev/null +++ b/riscv/insns/vmnand_mm.h @@ -0,0 +1,2 @@ +// vmnand.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h new file mode 100644 index 0000000..ab93378 --- /dev/null +++ b/riscv/insns/vmnor_mm.h @@ -0,0 +1,2 @@ +// vmnor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 | vs1)); diff --git a/riscv/insns/vmor_mm.h b/riscv/insns/vmor_mm.h new file mode 100644 index 0000000..32e71b9 --- /dev/null +++ b/riscv/insns/vmor_mm.h @@ -0,0 +1,2 @@ +// vmor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | vs1); diff --git a/riscv/insns/vmornot_mm.h b/riscv/insns/vmornot_mm.h new file mode 100644 index 0000000..bdc1d8b --- /dev/null +++ b/riscv/insns/vmornot_mm.h @@ -0,0 +1,2 @@ +// vmornot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | ~vs1); diff --git a/riscv/insns/vmpopc_m.h b/riscv/insns/vmpopc_m.h new file mode 100644 index 0000000..9e22b2b --- /dev/null +++ b/riscv/insns/vmpopc_m.h @@ -0,0 +1,24 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t popcount = 0; +for (reg_t i=P.VU.vstart; i(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (insn.v_vm() == 1) { + popcount += vs2_lsb; + } else { + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + popcount += (vs2_lsb && do_mask); + } +} +P.VU.vstart = 0; +WRITE_RD(popcount); diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h new file mode 100644 index 0000000..3804ba8 --- /dev/null +++ b/riscv/insns/vmsbc_vvm.h @@ -0,0 +1,14 @@ +// vmsbc.vvm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) - (op_mask & vs2) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h new file mode 100644 index 0000000..d5332f5 --- /dev/null +++ b/riscv/insns/vmsbc_vxm.h @@ -0,0 +1,14 @@ +// vmsbc.vxm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_XI_LOOP_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) - (op_mask & vs2) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h new file mode 100644 index 0000000..a014900 --- /dev/null +++ b/riscv/insns/vmsbf_m.h @@ -0,0 +1,34 @@ +// vmsbf.m vd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart; i < vl; ++i) { + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 64; + const int mpos = (mlen * i) % 64; + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + auto &vd = P.VU.elt(rd_num, midx); + + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} + +VI_TAIL_ZERO_MASK(rd_num); +P.VU.vstart = 0; diff --git a/riscv/insns/vmseq_vi.h b/riscv/insns/vmseq_vi.h new file mode 100644 index 0000000..cfc1682 --- /dev/null +++ b/riscv/insns/vmseq_vi.h @@ -0,0 +1,5 @@ +// vseq.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = simm5 == vs2; +}) diff --git a/riscv/insns/vmseq_vv.h b/riscv/insns/vmseq_vv.h new file mode 100644 index 0000000..91fd204 --- /dev/null +++ b/riscv/insns/vmseq_vv.h @@ -0,0 +1,6 @@ +// vseq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 == vs1; +}) + diff --git a/riscv/insns/vmseq_vx.h b/riscv/insns/vmseq_vx.h new file mode 100644 index 0000000..ab63323 --- /dev/null +++ b/riscv/insns/vmseq_vx.h @@ -0,0 +1,5 @@ +// vseq.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = rs1 == vs2; +}) diff --git a/riscv/insns/vmsgt_vi.h b/riscv/insns/vmsgt_vi.h new file mode 100644 index 0000000..4f7dea8 --- /dev/null +++ b/riscv/insns/vmsgt_vi.h @@ -0,0 +1,5 @@ +// vsgt.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 > simm5; +}) diff --git a/riscv/insns/vmsgt_vx.h b/riscv/insns/vmsgt_vx.h new file mode 100644 index 0000000..5f24db6 --- /dev/null +++ b/riscv/insns/vmsgt_vx.h @@ -0,0 +1,5 @@ +// vsgt.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsgtu_vi.h b/riscv/insns/vmsgtu_vi.h new file mode 100644 index 0000000..268d437 --- /dev/null +++ b/riscv/insns/vmsgtu_vi.h @@ -0,0 +1,5 @@ +// vsgtu.vi vd, vd2, zimm5 +VI_VI_ULOOP_CMP +({ + res = vs2 > simm5; +}) diff --git a/riscv/insns/vmsgtu_vx.h b/riscv/insns/vmsgtu_vx.h new file mode 100644 index 0000000..7f39800 --- /dev/null +++ b/riscv/insns/vmsgtu_vx.h @@ -0,0 +1,5 @@ +// vsgtu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h new file mode 100644 index 0000000..144b67c --- /dev/null +++ b/riscv/insns/vmsif_m.h @@ -0,0 +1,34 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 64; + const int mpos = (mlen * i) % 64; + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + auto &vd = P.VU.elt(rd_num, midx); + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} + +VI_TAIL_ZERO_MASK(rd_num); +P.VU.vstart = 0; diff --git a/riscv/insns/vmsle_vi.h b/riscv/insns/vmsle_vi.h new file mode 100644 index 0000000..f0f67d0 --- /dev/null +++ b/riscv/insns/vmsle_vi.h @@ -0,0 +1,5 @@ +// vsle.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 <= simm5; +}) diff --git a/riscv/insns/vmsle_vv.h b/riscv/insns/vmsle_vv.h new file mode 100644 index 0000000..30aba06 --- /dev/null +++ b/riscv/insns/vmsle_vv.h @@ -0,0 +1,5 @@ +// vsle.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsle_vx.h b/riscv/insns/vmsle_vx.h new file mode 100644 index 0000000..c26d596 --- /dev/null +++ b/riscv/insns/vmsle_vx.h @@ -0,0 +1,5 @@ +// vsle.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmsleu_vi.h b/riscv/insns/vmsleu_vi.h new file mode 100644 index 0000000..dc4fd18 --- /dev/null +++ b/riscv/insns/vmsleu_vi.h @@ -0,0 +1,5 @@ +// vsleu.vi vd, vs2, zimm5 +VI_VI_ULOOP_CMP +({ + res = vs2 <= simm5; +}) diff --git a/riscv/insns/vmsleu_vv.h b/riscv/insns/vmsleu_vv.h new file mode 100644 index 0000000..0e46032 --- /dev/null +++ b/riscv/insns/vmsleu_vv.h @@ -0,0 +1,5 @@ +// vsleu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsleu_vx.h b/riscv/insns/vmsleu_vx.h new file mode 100644 index 0000000..935b176 --- /dev/null +++ b/riscv/insns/vmsleu_vx.h @@ -0,0 +1,5 @@ +// vsleu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmslt_vv.h b/riscv/insns/vmslt_vv.h new file mode 100644 index 0000000..71e6f87 --- /dev/null +++ b/riscv/insns/vmslt_vv.h @@ -0,0 +1,5 @@ +// vslt.vv vd, vd2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmslt_vx.h b/riscv/insns/vmslt_vx.h new file mode 100644 index 0000000..b32bb14 --- /dev/null +++ b/riscv/insns/vmslt_vx.h @@ -0,0 +1,5 @@ +// vslt.vx vd, vs2, vs1 +VI_VX_LOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsltu_vv.h b/riscv/insns/vmsltu_vv.h new file mode 100644 index 0000000..53a570a --- /dev/null +++ b/riscv/insns/vmsltu_vv.h @@ -0,0 +1,5 @@ +// vsltu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmsltu_vx.h b/riscv/insns/vmsltu_vx.h new file mode 100644 index 0000000..8082544 --- /dev/null +++ b/riscv/insns/vmsltu_vx.h @@ -0,0 +1,5 @@ +// vsltu.vx vd, vs2, vs1 +VI_VX_ULOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsne_vi.h b/riscv/insns/vmsne_vi.h new file mode 100644 index 0000000..5e9758e --- /dev/null +++ b/riscv/insns/vmsne_vi.h @@ -0,0 +1,5 @@ +// vsne.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 != simm5; +}) diff --git a/riscv/insns/vmsne_vv.h b/riscv/insns/vmsne_vv.h new file mode 100644 index 0000000..e6a7174 --- /dev/null +++ b/riscv/insns/vmsne_vv.h @@ -0,0 +1,5 @@ +// vneq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 != vs1; +}) diff --git a/riscv/insns/vmsne_vx.h b/riscv/insns/vmsne_vx.h new file mode 100644 index 0000000..9e4c155 --- /dev/null +++ b/riscv/insns/vmsne_vx.h @@ -0,0 +1,5 @@ +// vsne.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 != rs1; +}) diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h new file mode 100644 index 0000000..b4cbbce --- /dev/null +++ b/riscv/insns/vmsof_m.h @@ -0,0 +1,32 @@ +// vmsof.m rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 64; + const int mpos = (mlen * i) % 64; + const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + uint64_t &vd = P.VU.elt(rd_num, midx); + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} + +VI_TAIL_ZERO_MASK(rd_num); +P.VU.vstart = 0; diff --git a/riscv/insns/vmul_vv.h b/riscv/insns/vmul_vv.h new file mode 100644 index 0000000..a327817 --- /dev/null +++ b/riscv/insns/vmul_vv.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 * vs1; +}) diff --git a/riscv/insns/vmul_vx.h b/riscv/insns/vmul_vx.h new file mode 100644 index 0000000..8d68390 --- /dev/null +++ b/riscv/insns/vmul_vx.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 * rs1; +}) diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h new file mode 100644 index 0000000..e861a33 --- /dev/null +++ b/riscv/insns/vmulh_vv.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, vs1 +VI_VV_LOOP +({ + vd = ((int128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h new file mode 100644 index 0000000..b6b5503 --- /dev/null +++ b/riscv/insns/vmulh_vx.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, rs1 +VI_VX_LOOP +({ + vd = ((int128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h new file mode 100644 index 0000000..59882da --- /dev/null +++ b/riscv/insns/vmulhsu_vv.h @@ -0,0 +1,37 @@ +// vmulhsu.vv vd, vs2, vs1 +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h new file mode 100644 index 0000000..d39615a --- /dev/null +++ b/riscv/insns/vmulhsu_vx.h @@ -0,0 +1,37 @@ +// vmulhsu.vx vd, vs2, rs1 +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + uint8_t rs1 = RS1; + + vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + uint16_t rs1 = RS1; + + vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + uint32_t rs1 = RS1; + + vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i); + auto vs2 = P.VU.elt(rs2_num, i); + uint64_t rs1 = RS1; + + vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h new file mode 100644 index 0000000..8e318ed --- /dev/null +++ b/riscv/insns/vmulhu_vv.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, vs1 +VI_VV_ULOOP +({ + vd = ((uint128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h new file mode 100644 index 0000000..672ad32 --- /dev/null +++ b/riscv/insns/vmulhu_vx.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, rs1 +VI_VX_ULOOP +({ + vd = ((uint128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h new file mode 100644 index 0000000..1c4ffb2 --- /dev/null +++ b/riscv/insns/vmv_s_x.h @@ -0,0 +1,45 @@ +// vmv_s_x: vd[0] = rs1 +require(insn.v_vm() == 1); +require(P.VU.vsew == e8 || P.VU.vsew == e16 || + P.VU.vsew == e32 || P.VU.vsew == e64); +reg_t vl = P.VU.vl; + +if (vl > 0) { + reg_t rd_num = insn.rd(); + reg_t sew = P.VU.vsew; + + switch(sew) { + case e8: + P.VU.elt(rd_num, 0) = RS1; + break; + case e16: + P.VU.elt(rd_num, 0) = RS1; + break; + case e32: + P.VU.elt(rd_num, 0) = RS1; + break; + default: + P.VU.elt(rd_num, 0) = RS1; + break; + } + + const reg_t max_len = P.VU.VLEN / sew; + for (reg_t i = 1; i < max_len; ++i) { + switch(sew) { + case e8: + P.VU.elt(rd_num, i) = 0; + break; + case e16: + P.VU.elt(rd_num, i) = 0; + break; + case e32: + P.VU.elt(rd_num, i) = 0; + break; + default: + P.VU.elt(rd_num, i) = 0; + break; + } + } + + vl = 0; +} diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h new file mode 100644 index 0000000..31e9877 --- /dev/null +++ b/riscv/insns/vmv_v_i.h @@ -0,0 +1,5 @@ +// vmv.v.i vd, simm5 +VI_VVXI_MERGE_LOOP +({ + vd = simm5; +}) diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h new file mode 100644 index 0000000..734010b --- /dev/null +++ b/riscv/insns/vmv_v_v.h @@ -0,0 +1,5 @@ +// vvmv.v.v vd, vs1 +VI_VVXI_MERGE_LOOP +({ + vd = vs1; +}) diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h new file mode 100644 index 0000000..4688b3f --- /dev/null +++ b/riscv/insns/vmv_v_x.h @@ -0,0 +1,5 @@ +// vmv.v.x vd, rs1 +VI_VVXI_MERGE_LOOP +({ + vd = rs1; +}) diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h new file mode 100644 index 0000000..0736d5b --- /dev/null +++ b/riscv/insns/vmxnor_mm.h @@ -0,0 +1,2 @@ +// vmnxor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 ^ vs1)); diff --git a/riscv/insns/vmxor_mm.h b/riscv/insns/vmxor_mm.h new file mode 100644 index 0000000..7f0c576 --- /dev/null +++ b/riscv/insns/vmxor_mm.h @@ -0,0 +1,2 @@ +// vmxor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 ^ vs1); diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h new file mode 100644 index 0000000..ca27593 --- /dev/null +++ b/riscv/insns/vnclip_vi.h @@ -0,0 +1,24 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; +int64_t int_min = -(1 << (P.VU.vsew - 1)); +VI_VVXI_LOOP_NARROW +({ + + int64_t result = vs2; +// rounding + INT_ROUNDING(result, xrm, sew); + + result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + +// saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h new file mode 100644 index 0000000..7bcb4cb --- /dev/null +++ b/riscv/insns/vnclip_vv.h @@ -0,0 +1,30 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; +int64_t int_min = -(1 << (P.VU.vsew - 1)); +VI_VVXI_LOOP_NARROW +({ + + int64_t result = vs2; +// rounding + INT_ROUNDING(result, xrm, sew); + +// unsigned shifting to rs1 + uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); + if (unsigned_shift_amount >= (2 * sew)) { + unsigned_shift_amount = 2 * sew - 1; + } + + result = (vsext(result, sew * 2)) >> unsigned_shift_amount; + +// saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h new file mode 100644 index 0000000..b66e830 --- /dev/null +++ b/riscv/insns/vnclip_vx.h @@ -0,0 +1,29 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; +int64_t int_min = -(1 << (P.VU.vsew - 1)); +VI_VVXI_LOOP_NARROW +({ + + int64_t result = vs2; +// rounding + INT_ROUNDING(result, xrm, sew); + +// unsigned shifting to rs1 + uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); + if (unsigned_shift_amount >= (2 * sew)) { + unsigned_shift_amount = 2 * sew - 1; + } + result = vsext(result, sew * 2) >> unsigned_shift_amount; + +// saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h new file mode 100644 index 0000000..61cb015 --- /dev/null +++ b/riscv/insns/vnclipu_vi.h @@ -0,0 +1,20 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +uint64_t int_max = ~(-1ll << P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + uint64_t result = vs2_u; + // rounding + INT_ROUNDING(result, xrm, sew); + + // unsigned shifting to rs1 + result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + + // saturation + if (result & (uint64_t)(-1ll << sew)) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h new file mode 100644 index 0000000..004f24f --- /dev/null +++ b/riscv/insns/vnclipu_vv.h @@ -0,0 +1,26 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t int_max = ~(-1ll << P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + + uint64_t result = vs2_u; + +// rounding + INT_ROUNDING(result, xrm, sew); + +// unsigned shifting to rs1 + uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); + if (unsigned_shift_amount >= (2 * sew)) { + result = 0; + } else { + result = vzext(result, sew * 2) >> unsigned_shift_amount; + } +// saturation + if (result & (uint64_t)(-1ll << sew)) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h new file mode 100644 index 0000000..0507a2b --- /dev/null +++ b/riscv/insns/vnclipu_vx.h @@ -0,0 +1,26 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t int_max = ~(-1ll << P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + uint64_t result = vs2; + +// rounding + INT_ROUNDING(result, xrm, sew); + +// unsigned shifting to rs1 + uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); + if (unsigned_shift_amount >= (2 * sew)) { + result = 0; + } else { + result = vzext(result, sew * 2) >> unsigned_shift_amount; + } + +// saturation + if (result & (uint64_t)(-1ll << sew)) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}) diff --git a/riscv/insns/vnmsac_vv.h b/riscv/insns/vnmsac_vv.h new file mode 100644 index 0000000..7c10f29 --- /dev/null +++ b/riscv/insns/vnmsac_vv.h @@ -0,0 +1,5 @@ +// vmsac.vv: vd[i] = -(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = -(vs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsac_vx.h b/riscv/insns/vnmsac_vx.h new file mode 100644 index 0000000..44920be --- /dev/null +++ b/riscv/insns/vnmsac_vx.h @@ -0,0 +1,5 @@ +// vmsac: vd[i] = -(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = -(rs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsub_vv.h b/riscv/insns/vnmsub_vv.h new file mode 100644 index 0000000..37f8228 --- /dev/null +++ b/riscv/insns/vnmsub_vv.h @@ -0,0 +1,5 @@ +// vnmsub.vv: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = -(vd * vs1) + vs2; +}) diff --git a/riscv/insns/vnmsub_vx.h b/riscv/insns/vnmsub_vx.h new file mode 100644 index 0000000..2e00d22 --- /dev/null +++ b/riscv/insns/vnmsub_vx.h @@ -0,0 +1,5 @@ +// vnmsub.vx: vd[i] = -(vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = -(vd * rs1) + vs2; +}) diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h new file mode 100644 index 0000000..0502ff1 --- /dev/null +++ b/riscv/insns/vnsra_vi.h @@ -0,0 +1,5 @@ +// vnsra.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); +}) diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h new file mode 100644 index 0000000..555ce3f --- /dev/null +++ b/riscv/insns/vnsra_vv.h @@ -0,0 +1,5 @@ +// vnsra.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2 >> (vs1 & (sew * 2 - 1)); +}) diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h new file mode 100644 index 0000000..05a55e3 --- /dev/null +++ b/riscv/insns/vnsra_vx.h @@ -0,0 +1,5 @@ +// vnsra.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2 >> (rs1 & (sew * 2 - 1)); +}) diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h new file mode 100644 index 0000000..d4dfcf0 --- /dev/null +++ b/riscv/insns/vnsrl_vi.h @@ -0,0 +1,5 @@ +// vnsrl.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2_u >> (zimm5 & (sew * 2 - 1)); +}) diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h new file mode 100644 index 0000000..ab72b84 --- /dev/null +++ b/riscv/insns/vnsrl_vv.h @@ -0,0 +1,5 @@ +// vnsrl.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2_u >> (vs1 & (sew * 2 - 1)); +}) diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h new file mode 100644 index 0000000..e149b38 --- /dev/null +++ b/riscv/insns/vnsrl_vx.h @@ -0,0 +1,5 @@ +// vnsrl.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2_u >> (rs1 & (sew * 2 - 1)); +}) diff --git a/riscv/insns/vor_vi.h b/riscv/insns/vor_vi.h new file mode 100644 index 0000000..f759607 --- /dev/null +++ b/riscv/insns/vor_vi.h @@ -0,0 +1,5 @@ +// vor +VI_VI_LOOP +({ + vd = simm5 | vs2; +}) diff --git a/riscv/insns/vor_vv.h b/riscv/insns/vor_vv.h new file mode 100644 index 0000000..0c46066 --- /dev/null +++ b/riscv/insns/vor_vv.h @@ -0,0 +1,5 @@ +// vor +VI_VV_LOOP +({ + vd = vs1 | vs2; +}) diff --git a/riscv/insns/vor_vx.h b/riscv/insns/vor_vx.h new file mode 100644 index 0000000..01c003a --- /dev/null +++ b/riscv/insns/vor_vx.h @@ -0,0 +1,5 @@ +// vor +VI_VX_LOOP +({ + vd = rs1 | vs2; +}) diff --git a/riscv/insns/vredand_vs.h b/riscv/insns/vredand_vs.h new file mode 100644 index 0000000..6c2d908 --- /dev/null +++ b/riscv/insns/vredand_vs.h @@ -0,0 +1,5 @@ +// vredand.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res &= vs2; +}) diff --git a/riscv/insns/vredmax_vs.h b/riscv/insns/vredmax_vs.h new file mode 100644 index 0000000..be2e76a --- /dev/null +++ b/riscv/insns/vredmax_vs.h @@ -0,0 +1,5 @@ +// vredmax.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmaxu_vs.h b/riscv/insns/vredmaxu_vs.h new file mode 100644 index 0000000..960f486 --- /dev/null +++ b/riscv/insns/vredmaxu_vs.h @@ -0,0 +1,5 @@ +// vredmaxu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmin_vs.h b/riscv/insns/vredmin_vs.h new file mode 100644 index 0000000..50359b7 --- /dev/null +++ b/riscv/insns/vredmin_vs.h @@ -0,0 +1,5 @@ +// vredmin.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredminu_vs.h b/riscv/insns/vredminu_vs.h new file mode 100644 index 0000000..7082475 --- /dev/null +++ b/riscv/insns/vredminu_vs.h @@ -0,0 +1,5 @@ +// vredminu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredor_vs.h b/riscv/insns/vredor_vs.h new file mode 100644 index 0000000..f7acd9a --- /dev/null +++ b/riscv/insns/vredor_vs.h @@ -0,0 +1,5 @@ +// vredor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res |= vs2; +}) diff --git a/riscv/insns/vredsum_vs.h b/riscv/insns/vredsum_vs.h new file mode 100644 index 0000000..c4fefe5 --- /dev/null +++ b/riscv/insns/vredsum_vs.h @@ -0,0 +1,5 @@ +// vredsum.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vredxor_vs.h b/riscv/insns/vredxor_vs.h new file mode 100644 index 0000000..bb81ad9 --- /dev/null +++ b/riscv/insns/vredxor_vs.h @@ -0,0 +1,5 @@ +// vredxor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res ^= vs2; +}) diff --git a/riscv/insns/vrem_vv.h b/riscv/insns/vrem_vv.h new file mode 100644 index 0000000..da477f0 --- /dev/null +++ b/riscv/insns/vrem_vv.h @@ -0,0 +1,11 @@ +// vrem.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = vs2; + else if(vs2 == -(1 << (sew - 1)) && vs1 == -1) + vd = 0; + else { + vd = vs2 % vs1; + } +}) diff --git a/riscv/insns/vrem_vx.h b/riscv/insns/vrem_vx.h new file mode 100644 index 0000000..f068842 --- /dev/null +++ b/riscv/insns/vrem_vx.h @@ -0,0 +1,10 @@ +// vrem.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if (rs1 == 0) + vd = vs2; + else if (vs2 == -(1 << (sew - 1)) && rs1 == -1) + vd = 0; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vremu_vv.h b/riscv/insns/vremu_vv.h new file mode 100644 index 0000000..7e15072 --- /dev/null +++ b/riscv/insns/vremu_vv.h @@ -0,0 +1,8 @@ +// vremu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if (vs1 == 0) + vd = vs2; + else + vd = vs2 % vs1; +}) diff --git a/riscv/insns/vremu_vx.h b/riscv/insns/vremu_vx.h new file mode 100644 index 0000000..a87a820 --- /dev/null +++ b/riscv/insns/vremu_vx.h @@ -0,0 +1,8 @@ +// vremu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if (rs1 == 0) + vd = vs2; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h new file mode 100644 index 0000000..a9be102 --- /dev/null +++ b/riscv/insns/vrgather_vi.h @@ -0,0 +1,29 @@ +// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5]; +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +reg_t zimm5 = insn.v_zimm5(); +for (reg_t i = P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e16: + P.VU.elt(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e32: + P.VU.elt(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + default: + P.VU.elt(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + } +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h new file mode 100644 index 0000000..da8dc81 --- /dev/null +++ b/riscv/insns/vrgather_vv.h @@ -0,0 +1,39 @@ +// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +for (reg_t i = P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + VI_CHECK_VREG_OVERLAP(rd_num, rs1_num); + VI_CHECK_VREG_OVERLAP(rd_num, rs2_num); + switch (sew) { + case e8: { + auto vs1 = P.VU.elt(rs1_num, i); + //if (i > 255) continue; + P.VU.elt(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e16: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e32: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + default: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + } +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h new file mode 100644 index 0000000..d6c2e38 --- /dev/null +++ b/riscv/insns/vrgather_vx.h @@ -0,0 +1,30 @@ +// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1]; +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +reg_t rs1 = RS1; +for (reg_t i = P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e16: + P.VU.elt(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e32: + P.VU.elt(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + default: + P.VU.elt(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + } +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; diff --git a/riscv/insns/vrsub_vi.h b/riscv/insns/vrsub_vi.h new file mode 100644 index 0000000..198c33f --- /dev/null +++ b/riscv/insns/vrsub_vi.h @@ -0,0 +1,5 @@ +// vrsub.vi vd, vs2, imm, vm # vd[i] = imm - vs2[i] +VI_VI_LOOP +({ + vd = simm5 - vs2; +}) diff --git a/riscv/insns/vrsub_vx.h b/riscv/insns/vrsub_vx.h new file mode 100644 index 0000000..bfd6259 --- /dev/null +++ b/riscv/insns/vrsub_vx.h @@ -0,0 +1,5 @@ +// vrsub.vx vd, vs2, rs1, vm # vd[i] = rs1 - vs2[i] +VI_VX_LOOP +({ + vd = rs1 - vs2; +}) diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h new file mode 100644 index 0000000..de2cb83 --- /dev/null +++ b/riscv/insns/vsadd_vi.h @@ -0,0 +1,27 @@ +// vsadd.vi vd, vs2 simm5 +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VI_PARAMS(e8); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e16: { + VI_PARAMS(e16); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e32: { + VI_PARAMS(e32); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +default: { + VI_PARAMS(e64); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h new file mode 100644 index 0000000..2152bab --- /dev/null +++ b/riscv/insns/vsadd_vv.h @@ -0,0 +1,28 @@ +// vsadd.vv vd, vs2, vs1 +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_add(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_add(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_add(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_add(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END + diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h new file mode 100644 index 0000000..781e9e8 --- /dev/null +++ b/riscv/insns/vsadd_vx.h @@ -0,0 +1,27 @@ +// vsadd.vx vd, vs2, rs1 +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_add(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_add(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_add(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_add(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsaddu_vi.h b/riscv/insns/vsaddu_vi.h new file mode 100644 index 0000000..9d376cc --- /dev/null +++ b/riscv/insns/vsaddu_vi.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, zimm5 +VI_VI_ULOOP +({ + bool sat = false; + vd = vs2 + simm5; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vv.h b/riscv/insns/vsaddu_vv.h new file mode 100644 index 0000000..e5d7025 --- /dev/null +++ b/riscv/insns/vsaddu_vv.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, vs1 +VI_VV_ULOOP +({ + bool sat = false; + vd = vs2 + vs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vx.h b/riscv/insns/vsaddu_vx.h new file mode 100644 index 0000000..46ec29d --- /dev/null +++ b/riscv/insns/vsaddu_vx.h @@ -0,0 +1,12 @@ +// vsaddu vd, vs2, rs1 +VI_VX_ULOOP +({ + bool sat = false; + vd = vs2 + rs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; + +}) diff --git a/riscv/insns/vsbc_vvm.h b/riscv/insns/vsbc_vvm.h new file mode 100644 index 0000000..4cd58ba --- /dev/null +++ b/riscv/insns/vsbc_vvm.h @@ -0,0 +1,11 @@ +// vsbc.vvm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VV_LOOP +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) - (op_mask & vs2) - carry; + vd = res; +}) diff --git a/riscv/insns/vsbc_vxm.h b/riscv/insns/vsbc_vxm.h new file mode 100644 index 0000000..12551b8 --- /dev/null +++ b/riscv/insns/vsbc_vxm.h @@ -0,0 +1,11 @@ +// vsbc.vxm vd, vs2, rs1 +require(!(insn.rd() == 0 && P.VU.vlmul > 1)); +VI_VX_ULOOP +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) - (op_mask & vs2) - carry; + vd = res; +}) diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h new file mode 100644 index 0000000..0069df7 --- /dev/null +++ b/riscv/insns/vslide1down_vx.h @@ -0,0 +1,42 @@ +//vslide1down.vx vd, vs2, rs1 +VI_LOOP_BASE +if (i != vl - 1) { + switch (sew) { + case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, 1); + vd = vs2; + } + break; + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, 1); + vd = vs2; + } + break; + default: { + VI_XI_SLIDEDOWN_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (sew) { + case e8: + P.VU.elt(rd_num, vl - 1) = RS1; + break; + case e16: + P.VU.elt(rd_num, vl - 1) = RS1; + break; + case e32: + P.VU.elt(rd_num, vl - 1) = RS1; + break; + default: + P.VU.elt(rd_num, vl - 1) = RS1; + break; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h new file mode 100644 index 0000000..50cc503 --- /dev/null +++ b/riscv/insns/vslide1up_vx.h @@ -0,0 +1,32 @@ +//vslide1up.vx vd, vs2, rs1 +if (insn.v_vm() == 0) + require(insn.rd() != 0); + +VI_CHECK_SS +VI_LOOP_BASE +if (i != 0) { + if (sew == e8) { + VI_XI_SLIDEUP_PARAMS(e8, 1); + vd = vs2; + } else if(sew == e16) { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } else if(sew == e32) { + VI_XI_SLIDEUP_PARAMS(e32, 1); + vd = vs2; + } else if(sew == e64) { + VI_XI_SLIDEUP_PARAMS(e64, 1); + vd = vs2; + } +} else { + if (sew == e8) { + P.VU.elt(rd_num, 0) = RS1; + } else if(sew == e16) { + P.VU.elt(rd_num, 0) = RS1; + } else if(sew == e32) { + P.VU.elt(rd_num, 0) = RS1; + } else if(sew == e64) { + P.VU.elt(rd_num, 0) = RS1; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h new file mode 100644 index 0000000..c21c5f2 --- /dev/null +++ b/riscv/insns/vslidedown_vi.h @@ -0,0 +1,33 @@ +// vslidedown.vi vd, vs2, rs1 +VI_LOOP_BASE +const reg_t sh = insn.v_zimm5(); +bool is_valid = (i + sh) < P.VU.vlmax; +reg_t offset = 0; + +if (is_valid) { + offset = sh; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h new file mode 100644 index 0000000..251740c --- /dev/null +++ b/riscv/insns/vslidedown_vx.h @@ -0,0 +1,33 @@ +//vslidedown.vx vd, vs2, rs1 +VI_LOOP_BASE + +reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1; +bool is_valid = offset < P.VU.vlmax; + +if (!is_valid) { + offset = 0; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h new file mode 100644 index 0000000..4135b20 --- /dev/null +++ b/riscv/insns/vslideup_vi.h @@ -0,0 +1,33 @@ +// vslideup.vi vd, vs2, rs1 +if (insn.v_vm() == 0) + require(insn.rd() != 0); + +VI_CHECK_SS +const reg_t offset = insn.v_zimm5(); +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h new file mode 100644 index 0000000..bf73fcd --- /dev/null +++ b/riscv/insns/vslideup_vx.h @@ -0,0 +1,29 @@ +//vslideup.vx vd, vs2, rs1 +const reg_t offset = RS1; +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vsll_vi.h b/riscv/insns/vsll_vi.h new file mode 100644 index 0000000..be46506 --- /dev/null +++ b/riscv/insns/vsll_vi.h @@ -0,0 +1,5 @@ +// vsll.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 << (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsll_vv.h b/riscv/insns/vsll_vv.h new file mode 100644 index 0000000..ce82022 --- /dev/null +++ b/riscv/insns/vsll_vv.h @@ -0,0 +1,5 @@ +// vsll +VI_VV_LOOP +({ + vd = vs2 << (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsll_vx.h b/riscv/insns/vsll_vx.h new file mode 100644 index 0000000..823510b --- /dev/null +++ b/riscv/insns/vsll_vx.h @@ -0,0 +1,5 @@ +// vsll +VI_VX_LOOP +({ + vd = vs2 << (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h new file mode 100644 index 0000000..70d22ae --- /dev/null +++ b/riscv/insns/vsmul_vv.h @@ -0,0 +1,33 @@ +// vsmul: Signed saturating and rounding fractional multiply +VRM xrm = P.VU.get_vround_mode(); +uint64_t int_max = (1ul << (P.VU.vsew - 1)) - 1; +uint64_t int_min = - (1 << (P.VU.vsew - 1)); +uint64_t sign_mask = ((1ul << (P.VU.vsew - 1))); + +VI_VV_ULOOP +({ + uint64_t vs1_sign; + uint64_t vs2_sign; + uint64_t result_sign; + + vs1_sign = vs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = vs1 == vs2 && vs1 == int_min; + + uint128_t result = (uint128_t)vs1 * (uint128_t)vs2; + result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + result_sign = (vs1_sign ^ vs2_sign) & sign_mask; + // rounding + INT_ROUNDING(result, xrm, sew - 1); + // unsigned shifting + result = result >> (sew - 1); + + // saturation + if (overflow) { + result = int_max; + P.VU.vxsat = 1; + } else { + result |= result_sign; + } + vd = result; +}) diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h new file mode 100644 index 0000000..ef3751b --- /dev/null +++ b/riscv/insns/vsmul_vx.h @@ -0,0 +1,34 @@ +// vsmul +VRM xrm = P.VU.get_vround_mode(); +uint128_t int_max = (1ul << (P.VU.vsew - 1)) - 1; +uint128_t int_min = - (1 << (P.VU.vsew - 1)); +uint128_t sign_mask = ((1ul << (P.VU.vsew - 1))); + +VI_VX_ULOOP +({ + uint128_t rs1_sign; + uint128_t vs2_sign; + uint128_t result_sign; + + rs1_sign = rs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = rs1 == vs2 && rs1 == int_min; + + uint128_t result = (uint128_t)rs1 * (uint128_t)vs2; + result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + result_sign = (rs1_sign ^ vs2_sign) & sign_mask; + // rounding + INT_ROUNDING(result, xrm, sew - 1); + + // unsigned shifting + result = result >> (sew - 1); + + // saturation + if (overflow) { + result = int_max; + P.VU.vxsat = 1; + } else { + result |= result_sign; + } + vd = result; +}) diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h new file mode 100644 index 0000000..5c58927 --- /dev/null +++ b/riscv/insns/vsra_vi.h @@ -0,0 +1,5 @@ +// vsra.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 >> (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsra_vv.h b/riscv/insns/vsra_vv.h new file mode 100644 index 0000000..8889af9 --- /dev/null +++ b/riscv/insns/vsra_vv.h @@ -0,0 +1,5 @@ +// vsra.vv vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsra_vx.h b/riscv/insns/vsra_vx.h new file mode 100644 index 0000000..c1b0c10 --- /dev/null +++ b/riscv/insns/vsra_vx.h @@ -0,0 +1,5 @@ +// vsra.vx vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vi.h b/riscv/insns/vsrl_vi.h new file mode 100644 index 0000000..5006854 --- /dev/null +++ b/riscv/insns/vsrl_vi.h @@ -0,0 +1,5 @@ +// vsrl.vi vd, vs2, zimm5 +VI_VI_ULOOP +({ + vd = vs2 >> (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsrl_vv.h b/riscv/insns/vsrl_vv.h new file mode 100644 index 0000000..6376af3 --- /dev/null +++ b/riscv/insns/vsrl_vv.h @@ -0,0 +1,5 @@ +// vsrl.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vx.h b/riscv/insns/vsrl_vx.h new file mode 100644 index 0000000..a4f899c --- /dev/null +++ b/riscv/insns/vsrl_vx.h @@ -0,0 +1,5 @@ +// vsrl.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h new file mode 100644 index 0000000..ef2390c --- /dev/null +++ b/riscv/insns/vssra_vi.h @@ -0,0 +1,8 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_LOOP +({ + int sh = simm5 & (sew - 1) & 0x1f; + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h new file mode 100644 index 0000000..e697b52 --- /dev/null +++ b/riscv/insns/vssra_vv.h @@ -0,0 +1,9 @@ +// vssra.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_LOOP +({ + int sh = vs1 & (sew - 1); + + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h new file mode 100644 index 0000000..8d7ad20 --- /dev/null +++ b/riscv/insns/vssra_vx.h @@ -0,0 +1,9 @@ +// vssra.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_LOOP +({ + int sh = rs1 & (sew - 1); + + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h new file mode 100644 index 0000000..8a10df0 --- /dev/null +++ b/riscv/insns/vssrl_vi.h @@ -0,0 +1,9 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_ULOOP +({ + int sh = simm5 & (sew - 1) & 0x1f; + + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h new file mode 100644 index 0000000..f40cd90 --- /dev/null +++ b/riscv/insns/vssrl_vv.h @@ -0,0 +1,9 @@ +// vssrl.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_ULOOP +({ + int sh = vs1 & (sew - 1); + + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h new file mode 100644 index 0000000..5da3f75 --- /dev/null +++ b/riscv/insns/vssrl_vx.h @@ -0,0 +1,9 @@ +// vssrl.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_ULOOP +({ + int sh = rs1 & (sew - 1); + + INT_ROUNDING(vs2, xrm, sh); + vd = vs2 >> sh; +}) diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h new file mode 100644 index 0000000..fd3ee21 --- /dev/null +++ b/riscv/insns/vssub_vv.h @@ -0,0 +1,28 @@ +// vssub.vv vd, vs2, vs1 +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_sub(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_sub(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h new file mode 100644 index 0000000..5c5c781 --- /dev/null +++ b/riscv/insns/vssub_vx.h @@ -0,0 +1,28 @@ +// vssub.vx vd, vs2, rs1 +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_sub(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_sub(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h new file mode 100644 index 0000000..c5c74fe --- /dev/null +++ b/riscv/insns/vssubu_vv.h @@ -0,0 +1,29 @@ +// vssubu.vv vd, vs2, vs1 +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_U_PARAMS(e8); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e16: { + VV_U_PARAMS(e16); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e32: { + VV_U_PARAMS(e32); + vd = sat_subu(vs2, vs1, sat); + break; +} +default: { + VV_U_PARAMS(e64); + vd = sat_subu(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; + +VI_LOOP_END diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h new file mode 100644 index 0000000..12cfdbb --- /dev/null +++ b/riscv/insns/vssubu_vx.h @@ -0,0 +1,28 @@ +// vssubu.vx vd, vs2, rs1 +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_U_PARAMS(e8); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e16: { + VX_U_PARAMS(e16); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e32: { + VX_U_PARAMS(e32); + vd = sat_subu(vs2, rs1, sat); + break; +} +default: { + VX_U_PARAMS(e64); + vd = sat_subu(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsub_vv.h b/riscv/insns/vsub_vv.h new file mode 100644 index 0000000..7d119d5 --- /dev/null +++ b/riscv/insns/vsub_vv.h @@ -0,0 +1,5 @@ +// vsub +VI_VV_LOOP +({ + vd = vs2 - vs1; +}) diff --git a/riscv/insns/vsub_vx.h b/riscv/insns/vsub_vx.h new file mode 100644 index 0000000..e075b42 --- /dev/null +++ b/riscv/insns/vsub_vx.h @@ -0,0 +1,5 @@ +// vsub: vd[i] = (vd[i] * x[rs1]) - vs2[i] +VI_VX_LOOP +({ + vd = vs2 - rs1; +}) diff --git a/riscv/insns/vwadd_vv.h b/riscv/insns/vwadd_vv.h new file mode 100644 index 0000000..df4a135 --- /dev/null +++ b/riscv/insns/vwadd_vv.h @@ -0,0 +1,6 @@ +// vwadd.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_vx.h b/riscv/insns/vwadd_vx.h new file mode 100644 index 0000000..c226389 --- /dev/null +++ b/riscv/insns/vwadd_vx.h @@ -0,0 +1,6 @@ +// vwadd.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_wv.h b/riscv/insns/vwadd_wv.h new file mode 100644 index 0000000..54d2ba4 --- /dev/null +++ b/riscv/insns/vwadd_wv.h @@ -0,0 +1,6 @@ +// vwadd.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, int); +}) diff --git a/riscv/insns/vwadd_wx.h b/riscv/insns/vwadd_wx.h new file mode 100644 index 0000000..bb4cee5 --- /dev/null +++ b/riscv/insns/vwadd_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, int); +}) diff --git a/riscv/insns/vwaddu_vv.h b/riscv/insns/vwaddu_vv.h new file mode 100644 index 0000000..286ebc8 --- /dev/null +++ b/riscv/insns/vwaddu_vv.h @@ -0,0 +1,6 @@ +// vwaddu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_vx.h b/riscv/insns/vwaddu_vx.h new file mode 100644 index 0000000..61cddfc --- /dev/null +++ b/riscv/insns/vwaddu_vx.h @@ -0,0 +1,6 @@ +// vwaddu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_wv.h b/riscv/insns/vwaddu_wv.h new file mode 100644 index 0000000..fee8136 --- /dev/null +++ b/riscv/insns/vwaddu_wv.h @@ -0,0 +1,6 @@ +// vwaddu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, uint); +}) diff --git a/riscv/insns/vwaddu_wx.h b/riscv/insns/vwaddu_wx.h new file mode 100644 index 0000000..0073ac3 --- /dev/null +++ b/riscv/insns/vwaddu_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, uint); +}) diff --git a/riscv/insns/vwmacc_vv.h b/riscv/insns/vwmacc_vv.h new file mode 100644 index 0000000..7208c6d --- /dev/null +++ b/riscv/insns/vwmacc_vv.h @@ -0,0 +1,6 @@ +// vwmacc.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmacc_vx.h b/riscv/insns/vwmacc_vx.h new file mode 100644 index 0000000..5ae597a --- /dev/null +++ b/riscv/insns/vwmacc_vx.h @@ -0,0 +1,6 @@ +// vwmacc.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmaccsu_vv.h b/riscv/insns/vwmaccsu_vv.h new file mode 100644 index 0000000..3aa43ef --- /dev/null +++ b/riscv/insns/vwmaccsu_vv.h @@ -0,0 +1,6 @@ +// vwmaccsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, vs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccsu_vx.h b/riscv/insns/vwmaccsu_vx.h new file mode 100644 index 0000000..e00a21d --- /dev/null +++ b/riscv/insns/vwmaccsu_vx.h @@ -0,0 +1,6 @@ +// vwmaccsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccu_vv.h b/riscv/insns/vwmaccu_vv.h new file mode 100644 index 0000000..2cbdaa3 --- /dev/null +++ b/riscv/insns/vwmaccu_vv.h @@ -0,0 +1,6 @@ +// vwmaccu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccu_vx.h b/riscv/insns/vwmaccu_vx.h new file mode 100644 index 0000000..533297f --- /dev/null +++ b/riscv/insns/vwmaccu_vx.h @@ -0,0 +1,6 @@ +// vwmaccu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccus_vx.h b/riscv/insns/vwmaccus_vx.h new file mode 100644 index 0000000..5310f0e --- /dev/null +++ b/riscv/insns/vwmaccus_vx.h @@ -0,0 +1,6 @@ +// vwmaccus.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, int, uint); +}) diff --git a/riscv/insns/vwmul_vv.h b/riscv/insns/vwmul_vv.h new file mode 100644 index 0000000..2197edb --- /dev/null +++ b/riscv/insns/vwmul_vv.h @@ -0,0 +1,6 @@ +// vwmul.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmul_vx.h b/riscv/insns/vwmul_vx.h new file mode 100644 index 0000000..bc1422d --- /dev/null +++ b/riscv/insns/vwmul_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmulsu_vv.h b/riscv/insns/vwmulsu_vv.h new file mode 100644 index 0000000..9786adb --- /dev/null +++ b/riscv/insns/vwmulsu_vv.h @@ -0,0 +1,16 @@ +// vwmulsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)vs1; + break; + case e16: + P.VU.elt(rd_num, i) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)vs1; + break; + default: + P.VU.elt(rd_num, i) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)vs1; + break; + } +}) diff --git a/riscv/insns/vwmulsu_vx.h b/riscv/insns/vwmulsu_vx.h new file mode 100644 index 0000000..feb1fd1 --- /dev/null +++ b/riscv/insns/vwmulsu_vx.h @@ -0,0 +1,16 @@ +// vwmulsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)rs1; + break; + case e16: + P.VU.elt(rd_num, i) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)rs1; + break; + default: + P.VU.elt(rd_num, i) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)rs1; + break; + } +}) diff --git a/riscv/insns/vwmulu_vv.h b/riscv/insns/vwmulu_vv.h new file mode 100644 index 0000000..8ddbb4b --- /dev/null +++ b/riscv/insns/vwmulu_vv.h @@ -0,0 +1,6 @@ +// vwmulu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwmulu_vx.h b/riscv/insns/vwmulu_vx.h new file mode 100644 index 0000000..1ce77ee --- /dev/null +++ b/riscv/insns/vwmulu_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwredsum_vs.h b/riscv/insns/vwredsum_vs.h new file mode 100644 index 0000000..c7a87db --- /dev/null +++ b/riscv/insns/vwredsum_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_LOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwredsumu_vs.h b/riscv/insns/vwredsumu_vs.h new file mode 100644 index 0000000..889a77d --- /dev/null +++ b/riscv/insns/vwredsumu_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_ULOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h new file mode 100644 index 0000000..86d588d --- /dev/null +++ b/riscv/insns/vwsmacc_vv.h @@ -0,0 +1,2 @@ +// vwsmacc.vv vd, vs2, vs1 +VI_VVX_LOOP_WIDE_SSMA(vs1); diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h new file mode 100644 index 0000000..f0f04a3 --- /dev/null +++ b/riscv/insns/vwsmacc_vx.h @@ -0,0 +1,2 @@ +// vwsmacc.vx vd, vs2, rs1 +VI_VVX_LOOP_WIDE_SSMA(rs1); diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h new file mode 100644 index 0000000..cf1aa1e --- /dev/null +++ b/riscv/insns/vwsmaccsu_vv.h @@ -0,0 +1,2 @@ +// vwsmaccsu.vx vd, vs2, vs1 +VI_VVX_LOOP_WIDE_SU_SSMA(vs1); diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h new file mode 100644 index 0000000..681c309 --- /dev/null +++ b/riscv/insns/vwsmaccsu_vx.h @@ -0,0 +1,2 @@ +// vwsmaccsu.vx vd, vs2, rs1 +VI_VVX_LOOP_WIDE_SU_SSMA(rs1); diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h new file mode 100644 index 0000000..e873d93 --- /dev/null +++ b/riscv/insns/vwsmaccu_vv.h @@ -0,0 +1,2 @@ +// vwsmaccu.vv vd, vs2, vs1 +VI_VVX_LOOP_WIDE_USSMA(vs1); diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h new file mode 100644 index 0000000..7318fa7 --- /dev/null +++ b/riscv/insns/vwsmaccu_vx.h @@ -0,0 +1,2 @@ +// vwsmaccu vd, vs2, rs1 +VI_VVX_LOOP_WIDE_USSMA(rs1); diff --git a/riscv/insns/vwsmaccus_vx.h b/riscv/insns/vwsmaccus_vx.h new file mode 100644 index 0000000..da1a1c8 --- /dev/null +++ b/riscv/insns/vwsmaccus_vx.h @@ -0,0 +1,2 @@ +// vwsmaccus.vx vd, vs2, rs1 +VI_VVX_LOOP_WIDE_US_SSMA(rs1); diff --git a/riscv/insns/vwsub_vv.h b/riscv/insns/vwsub_vv.h new file mode 100644 index 0000000..99f9348 --- /dev/null +++ b/riscv/insns/vwsub_vv.h @@ -0,0 +1,6 @@ +// vwsub.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_vx.h b/riscv/insns/vwsub_vx.h new file mode 100644 index 0000000..affdf62 --- /dev/null +++ b/riscv/insns/vwsub_vx.h @@ -0,0 +1,6 @@ +// vwsub.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_wv.h b/riscv/insns/vwsub_wv.h new file mode 100644 index 0000000..10db730 --- /dev/null +++ b/riscv/insns/vwsub_wv.h @@ -0,0 +1,6 @@ +// vwsub.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, int); +}) diff --git a/riscv/insns/vwsub_wx.h b/riscv/insns/vwsub_wx.h new file mode 100644 index 0000000..f72341b --- /dev/null +++ b/riscv/insns/vwsub_wx.h @@ -0,0 +1,6 @@ +// vwsub.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, int); +}) diff --git a/riscv/insns/vwsubu_vv.h b/riscv/insns/vwsubu_vv.h new file mode 100644 index 0000000..cf68adb --- /dev/null +++ b/riscv/insns/vwsubu_vv.h @@ -0,0 +1,6 @@ +// vwsubu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_vx.h b/riscv/insns/vwsubu_vx.h new file mode 100644 index 0000000..3e972dd --- /dev/null +++ b/riscv/insns/vwsubu_vx.h @@ -0,0 +1,6 @@ +// vwsubu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_wv.h b/riscv/insns/vwsubu_wv.h new file mode 100644 index 0000000..3687c3d --- /dev/null +++ b/riscv/insns/vwsubu_wv.h @@ -0,0 +1,6 @@ +// vwsubu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, uint); +}) diff --git a/riscv/insns/vwsubu_wx.h b/riscv/insns/vwsubu_wx.h new file mode 100644 index 0000000..c7f20ed --- /dev/null +++ b/riscv/insns/vwsubu_wx.h @@ -0,0 +1,6 @@ +// vwsubu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, uint); +}) diff --git a/riscv/insns/vxor_vi.h b/riscv/insns/vxor_vi.h new file mode 100644 index 0000000..b2dcf94 --- /dev/null +++ b/riscv/insns/vxor_vi.h @@ -0,0 +1,5 @@ +// vxor +VI_VI_LOOP +({ + vd = simm5 ^ vs2; +}) diff --git a/riscv/insns/vxor_vv.h b/riscv/insns/vxor_vv.h new file mode 100644 index 0000000..c37b6ab --- /dev/null +++ b/riscv/insns/vxor_vv.h @@ -0,0 +1,5 @@ +// vxor +VI_VV_LOOP +({ + vd = vs1 ^ vs2; +}) diff --git a/riscv/insns/vxor_vx.h b/riscv/insns/vxor_vx.h new file mode 100644 index 0000000..8021e0e --- /dev/null +++ b/riscv/insns/vxor_vx.h @@ -0,0 +1,5 @@ +// vxor +VI_VX_LOOP +({ + vd = rs1 ^ vs2; +}) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 4d538c8..e8c7f04 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -291,11 +291,217 @@ riscv_insn_ext_q = \ fsqrt_q \ fsub_q \ +riscv_insn_ext_v_alu_int = \ + vaadd_vi \ + vaadd_vv \ + vaadd_vx \ + vadc_vim \ + vadc_vvm \ + vadc_vxm \ + vadd_vi \ + vadd_vv \ + vadd_vx \ + vand_vi \ + vand_vv \ + vand_vx \ + vasub_vv \ + vasub_vx \ + vcompress_vm \ + vdiv_vv \ + vdiv_vx \ + vdivu_vv \ + vdivu_vx \ + vdot_vv \ + vdotu_vv \ + vext_x_v \ + vid_v \ + viota_m \ + vmacc_vv \ + vmacc_vx \ + vmadc_vim \ + vmadc_vvm \ + vmadc_vxm \ + vmadd_vv \ + vmadd_vx \ + vmand_mm \ + vmandnot_mm \ + vmax_vv \ + vmax_vx \ + vmaxu_vv \ + vmaxu_vx \ + vmerge_vim \ + vmerge_vvm \ + vmerge_vxm \ + vmfirst_m \ + vmin_vv \ + vmin_vx \ + vminu_vv \ + vminu_vx \ + vmnand_mm \ + vmnor_mm \ + vmor_mm \ + vmornot_mm \ + vmpopc_m \ + vmsbc_vvm \ + vmsbc_vxm \ + vmsbf_m \ + vmseq_vi \ + vmseq_vv \ + vmseq_vx \ + vmsgt_vi \ + vmsgt_vx \ + vmsgtu_vi \ + vmsgtu_vx \ + vmsif_m \ + vmsle_vi \ + vmsle_vv \ + vmsle_vx \ + vmsleu_vi \ + vmsleu_vv \ + vmsleu_vx \ + vmslt_vv \ + vmslt_vx \ + vmsltu_vv \ + vmsltu_vx \ + vmsne_vi \ + vmsne_vv \ + vmsne_vx \ + vmsof_m \ + vmul_vv \ + vmul_vx \ + vmulh_vv \ + vmulh_vx \ + vmulhsu_vv \ + vmulhsu_vx \ + vmulhu_vv \ + vmulhu_vx \ + vmv_s_x \ + vmv_v_i \ + vmv_v_v \ + vmv_v_x \ + vmxnor_mm \ + vmxor_mm \ + vnclip_vi \ + vnclip_vv \ + vnclip_vx \ + vnclipu_vi \ + vnclipu_vv \ + vnclipu_vx \ + vnmsac_vv \ + vnmsac_vx \ + vnmsub_vv \ + vnmsub_vx \ + vnsra_vi \ + vnsra_vv \ + vnsra_vx \ + vnsrl_vi \ + vnsrl_vv \ + vnsrl_vx \ + vor_vi \ + vor_vv \ + vor_vx \ + vredand_vs \ + vredmax_vs \ + vredmaxu_vs \ + vredmin_vs \ + vredminu_vs \ + vredor_vs \ + vredsum_vs \ + vredxor_vs \ + vrem_vv \ + vrem_vx \ + vremu_vv \ + vremu_vx \ + vrgather_vi \ + vrgather_vv \ + vrgather_vx \ + vrsub_vi \ + vrsub_vx \ + vsadd_vi \ + vsadd_vv \ + vsadd_vx \ + vsaddu_vi \ + vsaddu_vv \ + vsaddu_vx \ + vsbc_vvm \ + vsbc_vxm \ + vslide1down_vx \ + vslide1up_vx \ + vslidedown_vi \ + vslidedown_vx \ + vslideup_vi \ + vslideup_vx \ + vsll_vi \ + vsll_vv \ + vsll_vx \ + vsmul_vv \ + vsmul_vx \ + vsra_vi \ + vsra_vv \ + vsra_vx \ + vsrl_vi \ + vsrl_vv \ + vsrl_vx \ + vssra_vi \ + vssra_vv \ + vssra_vx \ + vssrl_vi \ + vssrl_vv \ + vssrl_vx \ + vssub_vv \ + vssub_vx \ + vssubu_vv \ + vssubu_vx \ + vsub_vv \ + vsub_vx \ + vwadd_vv \ + vwadd_vx \ + vwadd_wv \ + vwadd_wx \ + vwaddu_vv \ + vwaddu_vx \ + vwaddu_wv \ + vwaddu_wx \ + vwmacc_vv \ + vwmacc_vx \ + vwmaccsu_vv \ + vwmaccsu_vx \ + vwmaccu_vv \ + vwmaccu_vx \ + vwmaccus_vx \ + vwmul_vv \ + vwmul_vx \ + vwmulsu_vv \ + vwmulsu_vx \ + vwmulu_vv \ + vwmulu_vx \ + vwredsum_vs \ + vwredsumu_vs \ + vwsmacc_vv \ + vwsmacc_vx \ + vwsmaccsu_vv \ + vwsmaccsu_vx \ + vwsmaccu_vv \ + vwsmaccu_vx \ + vwsmaccus_vx \ + vwsub_vv \ + vwsub_vx \ + vwsub_wv \ + vwsub_wx \ + vwsubu_vv \ + vwsubu_vx \ + vwsubu_wv \ + vwsubu_wx \ + vxor_vi \ + vxor_vv \ + vxor_vx \ + riscv_insn_ext_v_ctrl = \ vsetvli \ vsetvl \ riscv_insn_ext_v = \ + $(riscv_insn_ext_v_alu_int) \ $(riscv_insn_ext_v_ctrl) \ riscv_insn_priv = \ -- cgit v1.1