aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChunPing Chung <cpchung@pllab.cs.nthu.edu.tw>2021-05-11 14:15:45 +0800
committerGitHub <noreply@github.com>2021-05-10 23:15:45 -0700
commit0981d396bca516a2b17db4cf744b8463b210c4cc (patch)
tree8a10e57b5fc3696ef2acf6c9ba7a31ed9f8d371b
parent71acc77173587155e4f2e62e3372abab889803aa (diff)
downloadriscv-isa-sim-0981d396bca516a2b17db4cf744b8463b210c4cc.zip
riscv-isa-sim-0981d396bca516a2b17db4cf744b8463b210c4cc.tar.gz
riscv-isa-sim-0981d396bca516a2b17db4cf744b8463b210c4cc.tar.bz2
Support RISC-V p-ext-proposal v0.9.2 (#637)
* rvp: add 8/16 bits add/sub simd instructions * rvp: add 8/16 bits shift simd instructions * rvp: add 8/16 bits compare simd instructions * rvp: add 8/16 bits multiply simd instructions * rvp: add 8/16 bits misc simd instructions * rvp: add 8 bits unpacking simd instructions * rvp: update suppported extention and add restriction * rvp: update encoding.h and riscv.mk.in * rvp: disasm: add simd instruction support * rvp: update readme for p-ext simd instructions * rvp: fix rvp support version * rvp: update encoding.h generated from riscv-opcode p-ext branch * rvp: rename some macro argument * rvp: add pk[bb,bt,tt,tb][16,32] instructions * rvp: add kadd32, [su]maqa[_su] instructions * rvp: fix missing initial value of pd * rvp: add msw 32x32 multiply & add instructions * rvp: change to use extract64 * rvp: add msw 32x16 multiply & add instructions * rvp: fix some style * rvp: change reduction marcro definition * rvp: add signed 16x32 add/subtract instructions * rvp: use stdint to replace hardcode max/minimum * rvp: refactor some p-ext macro code * rvp: add partial simd miscellaneous instructions * rvp: add signed 16 x 64 add/subtract Instructions * rvp: add 64-bit add & sub instructions * rvp: add 32-bit mul with 64-bit add/sub instructions * rvp: add 16-bit mul with 64-bit add/sub instructions * rvp: disasm: add 64 bit profile instruction support * rvp: add Q15 saturation instructions * rvp: fix kmar64/kmsr64 saturation behavior * rvp: add 32-bit computation instructions * rvp: add rdov/clrov and fix khm16 behavior of setting OV flag * rvp: add non simd miscellaneous instructions * rvp: add Q31 saturation instructions * rvp: disasm: add non-simd instruction support * rvp: add 32 bits add/sub simd instructions * rvp: fix left shift saturation bug * rvp: add 32 bits shift simd instructions * rvp: add rv64 only Q15 simd instructions * rvp: add rv64 only 32-bit multiply instructions * rvp: add rv64 only 32-bit miscellaneous instructions * rvp: add rv64 only 32-bit mul & add instructions * rvp: add rv64 only 32-bit parallel mul & add instructions * rvp: add rv64 only non-simd 32-bit shift instructions * rvp: disasm: remove redundant tab * rvp: disasm: add rv64 only instructions support * rvp: change ov csr to ucode to match v0.5.2 spec * rvp: update readme for p-ext 0.5.2 * rvp: update to p-ext v0.9.1 * rvp: update to p-ext v0.9.2 * rvp: update readme for p-ext 0.9.2 * rvp: fix macro for PKxx16 & PKxx32 commands. * rvp: fix missing for in PKxxdd macro * Sign-extension for p-ext insns * * Fixed uclipNN insns while sh >> 64 is an UB. * Added missing OV * Added missing sext_xlen * Remove unused macroses * Sign extension for RD_PAIR macro * rvp: remove lost tab Co-authored-by: Mark Fedorov <mark.fedorov@cloudbear.ru>
-rw-r--r--README.md1
-rw-r--r--disasm/disasm.cc343
-rw-r--r--riscv/arith.h21
-rw-r--r--riscv/decode.h509
-rw-r--r--riscv/encoding.h981
-rw-r--r--riscv/insns/add16.h3
-rw-r--r--riscv/insns/add32.h4
-rw-r--r--riscv/insns/add64.h3
-rw-r--r--riscv/insns/add8.h3
-rw-r--r--riscv/insns/ave.h5
-rw-r--r--riscv/insns/bitrev.h12
-rw-r--r--riscv/insns/bitrevi.h12
-rw-r--r--riscv/insns/bpick.h6
-rw-r--r--riscv/insns/clo16.h11
-rw-r--r--riscv/insns/clo32.h12
-rw-r--r--riscv/insns/clo8.h10
-rw-r--r--riscv/insns/clrs16.h12
-rw-r--r--riscv/insns/clrs32.h13
-rw-r--r--riscv/insns/clrs8.h11
-rw-r--r--riscv/insns/clz16.h10
-rw-r--r--riscv/insns/clz32.h11
-rw-r--r--riscv/insns/clz8.h9
-rw-r--r--riscv/insns/cmpeq16.h3
-rw-r--r--riscv/insns/cmpeq8.h3
-rw-r--r--riscv/insns/cras16.h5
-rw-r--r--riscv/insns/cras32.h6
-rw-r--r--riscv/insns/crsa16.h5
-rw-r--r--riscv/insns/crsa32.h6
-rw-r--r--riscv/insns/insb.h3
-rw-r--r--riscv/insns/kabs16.h9
-rw-r--r--riscv/insns/kabs32.h9
-rw-r--r--riscv/insns/kabs8.h9
-rw-r--r--riscv/insns/kabsw.h9
-rw-r--r--riscv/insns/kadd16.h5
-rw-r--r--riscv/insns/kadd32.h6
-rw-r--r--riscv/insns/kadd64.h5
-rw-r--r--riscv/insns/kadd8.h5
-rw-r--r--riscv/insns/kaddh.h4
-rw-r--r--riscv/insns/kaddw.h4
-rw-r--r--riscv/insns/kcras16.h9
-rw-r--r--riscv/insns/kcras32.h10
-rw-r--r--riscv/insns/kcrsa16.h9
-rw-r--r--riscv/insns/kcrsa32.h10
-rw-r--r--riscv/insns/kdmabb.h16
-rw-r--r--riscv/insns/kdmabb16.h17
-rw-r--r--riscv/insns/kdmabt.h16
-rw-r--r--riscv/insns/kdmabt16.h17
-rw-r--r--riscv/insns/kdmatt.h16
-rw-r--r--riscv/insns/kdmatt16.h17
-rw-r--r--riscv/insns/kdmbb.h12
-rw-r--r--riscv/insns/kdmbb16.h12
-rw-r--r--riscv/insns/kdmbt.h12
-rw-r--r--riscv/insns/kdmbt16.h12
-rw-r--r--riscv/insns/kdmtt.h12
-rw-r--r--riscv/insns/kdmtt16.h12
-rw-r--r--riscv/insns/khm16.h8
-rw-r--r--riscv/insns/khm8.h8
-rw-r--r--riscv/insns/khmbb.h12
-rw-r--r--riscv/insns/khmbb16.h13
-rw-r--r--riscv/insns/khmbt.h12
-rw-r--r--riscv/insns/khmbt16.h13
-rw-r--r--riscv/insns/khmtt.h12
-rw-r--r--riscv/insns/khmtt16.h13
-rw-r--r--riscv/insns/khmx16.h8
-rw-r--r--riscv/insns/khmx8.h8
-rw-r--r--riscv/insns/kmabb.h6
-rw-r--r--riscv/insns/kmabb32.h7
-rw-r--r--riscv/insns/kmabt.h6
-rw-r--r--riscv/insns/kmabt32.h7
-rw-r--r--riscv/insns/kmada.h3
-rw-r--r--riscv/insns/kmadrs.h6
-rw-r--r--riscv/insns/kmadrs32.h9
-rw-r--r--riscv/insns/kmads.h6
-rw-r--r--riscv/insns/kmads32.h9
-rw-r--r--riscv/insns/kmar64.h15
-rw-r--r--riscv/insns/kmatt.h6
-rw-r--r--riscv/insns/kmatt32.h7
-rw-r--r--riscv/insns/kmaxda.h3
-rw-r--r--riscv/insns/kmaxda32.h9
-rw-r--r--riscv/insns/kmaxds.h6
-rw-r--r--riscv/insns/kmaxds32.h9
-rw-r--r--riscv/insns/kmda.h3
-rw-r--r--riscv/insns/kmda32.h9
-rw-r--r--riscv/insns/kmmac.h6
-rw-r--r--riscv/insns/kmmac_u.h7
-rw-r--r--riscv/insns/kmmawb.h6
-rw-r--r--riscv/insns/kmmawb2.h14
-rw-r--r--riscv/insns/kmmawb2_u.h14
-rw-r--r--riscv/insns/kmmawb_u.h7
-rw-r--r--riscv/insns/kmmawt.h6
-rw-r--r--riscv/insns/kmmawt2.h14
-rw-r--r--riscv/insns/kmmawt2_u.h14
-rw-r--r--riscv/insns/kmmawt_u.h7
-rw-r--r--riscv/insns/kmmsb.h6
-rw-r--r--riscv/insns/kmmsb_u.h7
-rw-r--r--riscv/insns/kmmwb2.h9
-rw-r--r--riscv/insns/kmmwb2_u.h9
-rw-r--r--riscv/insns/kmmwt2.h9
-rw-r--r--riscv/insns/kmmwt2_u.h9
-rw-r--r--riscv/insns/kmsda.h3
-rw-r--r--riscv/insns/kmsda32.h9
-rw-r--r--riscv/insns/kmsr64.h25
-rw-r--r--riscv/insns/kmsxda.h3
-rw-r--r--riscv/insns/kmsxda32.h9
-rw-r--r--riscv/insns/kmxda.h3
-rw-r--r--riscv/insns/kmxda32.h9
-rw-r--r--riscv/insns/ksll16.h5
-rw-r--r--riscv/insns/ksll32.h6
-rw-r--r--riscv/insns/ksll8.h5
-rw-r--r--riscv/insns/kslli16.h5
-rw-r--r--riscv/insns/kslli32.h6
-rw-r--r--riscv/insns/kslli8.h5
-rw-r--r--riscv/insns/kslliw.h7
-rw-r--r--riscv/insns/ksllw.h7
-rw-r--r--riscv/insns/kslra16.h11
-rw-r--r--riscv/insns/kslra16_u.h14
-rw-r--r--riscv/insns/kslra32.h12
-rw-r--r--riscv/insns/kslra32_u.h15
-rw-r--r--riscv/insns/kslra8.h11
-rw-r--r--riscv/insns/kslra8_u.h14
-rw-r--r--riscv/insns/kslraw.h13
-rw-r--r--riscv/insns/kslraw_u.h13
-rw-r--r--riscv/insns/kstas16.h9
-rw-r--r--riscv/insns/kstas32.h10
-rw-r--r--riscv/insns/kstsa16.h9
-rw-r--r--riscv/insns/kstsa32.h10
-rw-r--r--riscv/insns/ksub16.h5
-rw-r--r--riscv/insns/ksub32.h6
-rw-r--r--riscv/insns/ksub64.h5
-rw-r--r--riscv/insns/ksub8.h5
-rw-r--r--riscv/insns/ksubh.h4
-rw-r--r--riscv/insns/ksubw.h4
-rw-r--r--riscv/insns/kwmmul.h9
-rw-r--r--riscv/insns/kwmmul_u.h9
-rw-r--r--riscv/insns/maddr32.h5
-rw-r--r--riscv/insns/maxw.h4
-rw-r--r--riscv/insns/minw.h4
-rw-r--r--riscv/insns/msubr32.h5
-rw-r--r--riscv/insns/mulr64.h3
-rw-r--r--riscv/insns/mulsr64.h3
-rw-r--r--riscv/insns/pbsad.h3
-rw-r--r--riscv/insns/pbsada.h3
-rw-r--r--riscv/insns/pkbb16.h1
-rw-r--r--riscv/insns/pkbb32.h2
-rw-r--r--riscv/insns/pkbt16.h1
-rw-r--r--riscv/insns/pkbt32.h2
-rw-r--r--riscv/insns/pktb16.h1
-rw-r--r--riscv/insns/pktb32.h2
-rw-r--r--riscv/insns/pktt16.h1
-rw-r--r--riscv/insns/pktt32.h2
-rw-r--r--riscv/insns/radd16.h3
-rw-r--r--riscv/insns/radd32.h4
-rw-r--r--riscv/insns/radd64.h8
-rw-r--r--riscv/insns/radd8.h3
-rw-r--r--riscv/insns/raddw.h4
-rw-r--r--riscv/insns/rcras16.h5
-rw-r--r--riscv/insns/rcras32.h6
-rw-r--r--riscv/insns/rcrsa16.h5
-rw-r--r--riscv/insns/rcrsa32.h6
-rw-r--r--riscv/insns/rstas16.h5
-rw-r--r--riscv/insns/rstas32.h6
-rw-r--r--riscv/insns/rstsa16.h5
-rw-r--r--riscv/insns/rstsa32.h6
-rw-r--r--riscv/insns/rsub16.h3
-rw-r--r--riscv/insns/rsub32.h4
-rw-r--r--riscv/insns/rsub64.h8
-rw-r--r--riscv/insns/rsub8.h3
-rw-r--r--riscv/insns/rsubw.h4
-rw-r--r--riscv/insns/sclip16.h13
-rw-r--r--riscv/insns/sclip32.h13
-rw-r--r--riscv/insns/sclip8.h13
-rw-r--r--riscv/insns/scmple16.h3
-rw-r--r--riscv/insns/scmple8.h3
-rw-r--r--riscv/insns/scmplt16.h3
-rw-r--r--riscv/insns/scmplt8.h3
-rw-r--r--riscv/insns/sll16.h3
-rw-r--r--riscv/insns/sll32.h4
-rw-r--r--riscv/insns/sll8.h3
-rw-r--r--riscv/insns/slli16.h3
-rw-r--r--riscv/insns/slli32.h4
-rw-r--r--riscv/insns/slli8.h3
-rw-r--r--riscv/insns/smal.h10
-rw-r--r--riscv/insns/smalbb.h3
-rw-r--r--riscv/insns/smalbt.h3
-rw-r--r--riscv/insns/smalda.h3
-rw-r--r--riscv/insns/smaldrs.h7
-rw-r--r--riscv/insns/smalds.h7
-rw-r--r--riscv/insns/smaltt.h3
-rw-r--r--riscv/insns/smalxda.h4
-rw-r--r--riscv/insns/smalxds.h4
-rw-r--r--riscv/insns/smaqa.h3
-rw-r--r--riscv/insns/smaqa_su.h3
-rw-r--r--riscv/insns/smar64.h3
-rw-r--r--riscv/insns/smax16.h3
-rw-r--r--riscv/insns/smax32.h3
-rw-r--r--riscv/insns/smax8.h3
-rw-r--r--riscv/insns/smbb16.h3
-rw-r--r--riscv/insns/smbt16.h3
-rw-r--r--riscv/insns/smbt32.h3
-rw-r--r--riscv/insns/smdrs.h6
-rw-r--r--riscv/insns/smdrs32.h7
-rw-r--r--riscv/insns/smds.h6
-rw-r--r--riscv/insns/smds32.h7
-rw-r--r--riscv/insns/smin16.h3
-rw-r--r--riscv/insns/smin32.h3
-rw-r--r--riscv/insns/smin8.h3
-rw-r--r--riscv/insns/smmul.h4
-rw-r--r--riscv/insns/smmul_u.h4
-rw-r--r--riscv/insns/smmwb.h4
-rw-r--r--riscv/insns/smmwb_u.h4
-rw-r--r--riscv/insns/smmwt.h4
-rw-r--r--riscv/insns/smmwt_u.h4
-rw-r--r--riscv/insns/smslda.h3
-rw-r--r--riscv/insns/smslxda.h4
-rw-r--r--riscv/insns/smsr64.h3
-rw-r--r--riscv/insns/smtt16.h3
-rw-r--r--riscv/insns/smtt32.h3
-rw-r--r--riscv/insns/smul16.h3
-rw-r--r--riscv/insns/smul8.h3
-rw-r--r--riscv/insns/smulx16.h3
-rw-r--r--riscv/insns/smulx8.h3
-rw-r--r--riscv/insns/smxds.h6
-rw-r--r--riscv/insns/smxds32.h7
-rw-r--r--riscv/insns/sra16.h3
-rw-r--r--riscv/insns/sra16_u.h6
-rw-r--r--riscv/insns/sra32.h4
-rw-r--r--riscv/insns/sra32_u.h7
-rw-r--r--riscv/insns/sra8.h3
-rw-r--r--riscv/insns/sra8_u.h6
-rw-r--r--riscv/insns/sra_u.h9
-rw-r--r--riscv/insns/srai16.h3
-rw-r--r--riscv/insns/srai16_u.h6
-rw-r--r--riscv/insns/srai32.h4
-rw-r--r--riscv/insns/srai32_u.h7
-rw-r--r--riscv/insns/srai8.h3
-rw-r--r--riscv/insns/srai8_u.h6
-rw-r--r--riscv/insns/srai_u.h9
-rw-r--r--riscv/insns/sraiw_u.h9
-rw-r--r--riscv/insns/srl16.h3
-rw-r--r--riscv/insns/srl16_u.h7
-rw-r--r--riscv/insns/srl32.h4
-rw-r--r--riscv/insns/srl32_u.h8
-rw-r--r--riscv/insns/srl8.h3
-rw-r--r--riscv/insns/srl8_u.h7
-rw-r--r--riscv/insns/srli16.h3
-rw-r--r--riscv/insns/srli16_u.h7
-rw-r--r--riscv/insns/srli32.h4
-rw-r--r--riscv/insns/srli32_u.h8
-rw-r--r--riscv/insns/srli8.h3
-rw-r--r--riscv/insns/srli8_u.h7
-rw-r--r--riscv/insns/stas16.h5
-rw-r--r--riscv/insns/stas32.h6
-rw-r--r--riscv/insns/stsa16.h5
-rw-r--r--riscv/insns/stsa32.h6
-rw-r--r--riscv/insns/sub16.h3
-rw-r--r--riscv/insns/sub32.h4
-rw-r--r--riscv/insns/sub64.h3
-rw-r--r--riscv/insns/sub8.h3
-rw-r--r--riscv/insns/sunpkd810.h1
-rw-r--r--riscv/insns/sunpkd820.h1
-rw-r--r--riscv/insns/sunpkd830.h1
-rw-r--r--riscv/insns/sunpkd831.h1
-rw-r--r--riscv/insns/sunpkd832.h1
-rw-r--r--riscv/insns/swap16.h4
-rw-r--r--riscv/insns/swap8.h4
-rw-r--r--riscv/insns/uclip16.h12
-rw-r--r--riscv/insns/uclip32.h12
-rw-r--r--riscv/insns/uclip8.h12
-rw-r--r--riscv/insns/ucmple16.h3
-rw-r--r--riscv/insns/ucmple8.h3
-rw-r--r--riscv/insns/ucmplt16.h3
-rw-r--r--riscv/insns/ucmplt8.h3
-rw-r--r--riscv/insns/ukadd16.h5
-rw-r--r--riscv/insns/ukadd32.h6
-rw-r--r--riscv/insns/ukadd64.h5
-rw-r--r--riscv/insns/ukadd8.h5
-rw-r--r--riscv/insns/ukaddh.h4
-rw-r--r--riscv/insns/ukaddw.h4
-rw-r--r--riscv/insns/ukcras16.h9
-rw-r--r--riscv/insns/ukcras32.h10
-rw-r--r--riscv/insns/ukcrsa16.h9
-rw-r--r--riscv/insns/ukcrsa32.h10
-rw-r--r--riscv/insns/ukmar64.h5
-rw-r--r--riscv/insns/ukmsr64.h5
-rw-r--r--riscv/insns/ukstas16.h9
-rw-r--r--riscv/insns/ukstas32.h10
-rw-r--r--riscv/insns/ukstsa16.h9
-rw-r--r--riscv/insns/ukstsa32.h10
-rw-r--r--riscv/insns/uksub16.h5
-rw-r--r--riscv/insns/uksub32.h6
-rw-r--r--riscv/insns/uksub64.h5
-rw-r--r--riscv/insns/uksub8.h5
-rw-r--r--riscv/insns/uksubh.h4
-rw-r--r--riscv/insns/uksubw.h4
-rw-r--r--riscv/insns/umaqa.h3
-rw-r--r--riscv/insns/umar64.h3
-rw-r--r--riscv/insns/umax16.h3
-rw-r--r--riscv/insns/umax32.h3
-rw-r--r--riscv/insns/umax8.h3
-rw-r--r--riscv/insns/umin16.h3
-rw-r--r--riscv/insns/umin32.h3
-rw-r--r--riscv/insns/umin8.h3
-rw-r--r--riscv/insns/umsr64.h3
-rw-r--r--riscv/insns/umul16.h3
-rw-r--r--riscv/insns/umul8.h3
-rw-r--r--riscv/insns/umulx16.h3
-rw-r--r--riscv/insns/umulx8.h3
-rw-r--r--riscv/insns/uradd16.h3
-rw-r--r--riscv/insns/uradd32.h4
-rw-r--r--riscv/insns/uradd64.h9
-rw-r--r--riscv/insns/uradd8.h3
-rw-r--r--riscv/insns/uraddw.h4
-rw-r--r--riscv/insns/urcras16.h5
-rw-r--r--riscv/insns/urcras32.h6
-rw-r--r--riscv/insns/urcrsa16.h5
-rw-r--r--riscv/insns/urcrsa32.h6
-rw-r--r--riscv/insns/urstas16.h5
-rw-r--r--riscv/insns/urstas32.h6
-rw-r--r--riscv/insns/urstsa16.h5
-rw-r--r--riscv/insns/urstsa32.h6
-rw-r--r--riscv/insns/ursub16.h3
-rw-r--r--riscv/insns/ursub32.h4
-rw-r--r--riscv/insns/ursub64.h9
-rw-r--r--riscv/insns/ursub8.h3
-rw-r--r--riscv/insns/ursubw.h4
-rw-r--r--riscv/insns/wext.h4
-rw-r--r--riscv/insns/wexti.h4
-rw-r--r--riscv/insns/zunpkd810.h1
-rw-r--r--riscv/insns/zunpkd820.h1
-rw-r--r--riscv/insns/zunpkd830.h1
-rw-r--r--riscv/insns/zunpkd831.h1
-rw-r--r--riscv/insns/zunpkd832.h1
-rw-r--r--riscv/processor.cc2
-rw-r--r--riscv/riscv.mk.in345
334 files changed, 4228 insertions, 1 deletions
diff --git a/README.md b/README.md
index 84b36bc..a89d244 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ Spike supports the following RISC-V ISA features:
- B extension, v0.92
- K extension, v0.8.1 ([Scalar Cryptography](https://github.com/riscv/riscv-crypto))
- V extension, v0.10, w/ Zvlsseg/Zvamo (_requires a 64-bit host_)
+ - P extension, v0.9.2
- Bi-endianness
- Conformance to both RVWMO and RVTSO (Spike is sequentially consistent)
- Machine, Supervisor, and User modes, v1.11
diff --git a/disasm/disasm.cc b/disasm/disasm.cc
index c6d8520..5741c43 100644
--- a/disasm/disasm.cc
+++ b/disasm/disasm.cc
@@ -46,6 +46,12 @@ struct : public arg_t {
struct : public arg_t {
std::string to_string(insn_t insn) const {
+ return xpr_name[insn.rs3()];
+ }
+} xrs3;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
return fpr_name[insn.rd()];
}
} frd;
@@ -377,6 +383,36 @@ struct : public arg_t {
}
} iorw;
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.p_imm2());
+ }
+} p_imm2;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.p_imm3());
+ }
+} p_imm3;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.p_imm4());
+ }
+} p_imm4;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.p_imm5());
+ }
+} p_imm5;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.p_imm6());
+ }
+} p_imm6;
+
typedef struct {
reg_t match;
reg_t mask;
@@ -1319,18 +1355,325 @@ disassembler_t::disassembler_t(int xlen)
}
}
+#define DEFINE_PITYTPE(code, immbit) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_imm##immbit});
+#define DEFINE_ONEOP(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1});
+
+#define DISASM_8_AND_16_RINSN(code) \
+ DEFINE_RTYPE(code##8); \
+ DEFINE_RTYPE(code##16);
+
+#define DISASM_8_AND_16_RINSN_ROUND(code) \
+ DISASM_INSN(#code "8.u", code##8_u, 0, {&xrd, &xrs1, &xrs2}); \
+ DISASM_INSN(#code "16.u", code##16_u, 0, {&xrd, &xrs1, &xrs2}); \
+
+#define DISASM_8_AND_16_PIINSN(code) \
+ DEFINE_PITYTPE(code##8, 3); \
+ DEFINE_PITYTPE(code##16, 4);
+
+#define DISASM_8_AND_16_PIINSN_ROUND(code) \
+ DISASM_INSN(#code "8.u", code##8_u, 0, {&xrd, &xrs1, &p_imm3}); \
+ DISASM_INSN(#code "16.u", code##16_u, 0, {&xrd, &xrs1, &p_imm4});
+
+#define DISASM_RINSN_AND_ROUND(code) \
+ DEFINE_RTYPE(code); \
+ DISASM_INSN(#code ".u", code##_u, 0, {&xrd, &xrs1, &xrs2});
+
+ DISASM_8_AND_16_RINSN(add);
+ DISASM_8_AND_16_RINSN(radd);
+ DISASM_8_AND_16_RINSN(uradd);
+ DISASM_8_AND_16_RINSN(kadd);
+ DISASM_8_AND_16_RINSN(ukadd);
+ DISASM_8_AND_16_RINSN(sub);
+ DISASM_8_AND_16_RINSN(rsub);
+ DISASM_8_AND_16_RINSN(ursub);
+ DISASM_8_AND_16_RINSN(ksub);
+ DISASM_8_AND_16_RINSN(uksub);
+ DEFINE_RTYPE(cras16);
+ DEFINE_RTYPE(rcras16);
+ DEFINE_RTYPE(urcras16);
+ DEFINE_RTYPE(kcras16);
+ DEFINE_RTYPE(ukcras16);
+ DEFINE_RTYPE(crsa16);
+ DEFINE_RTYPE(rcrsa16);
+ DEFINE_RTYPE(urcrsa16);
+ DEFINE_RTYPE(kcrsa16);
+ DEFINE_RTYPE(ukcrsa16);
+ DEFINE_RTYPE(stas16);
+ DEFINE_RTYPE(rstas16);
+ DEFINE_RTYPE(urstas16);
+ DEFINE_RTYPE(kstas16);
+ DEFINE_RTYPE(ukstas16);
+ DEFINE_RTYPE(stsa16);
+ DEFINE_RTYPE(rstsa16);
+ DEFINE_RTYPE(urstsa16);
+ DEFINE_RTYPE(kstsa16);
+ DEFINE_RTYPE(ukstsa16);
+
+ DISASM_8_AND_16_RINSN(sra);
+ DISASM_8_AND_16_RINSN(srl);
+ DISASM_8_AND_16_RINSN(sll);
+ DISASM_8_AND_16_RINSN(ksll);
+ DISASM_8_AND_16_RINSN(kslra);
+ DISASM_8_AND_16_PIINSN(srai);
+ DISASM_8_AND_16_PIINSN(srli);
+ DISASM_8_AND_16_PIINSN(slli);
+ DISASM_8_AND_16_PIINSN(kslli);
+ DISASM_8_AND_16_RINSN_ROUND(sra);
+ DISASM_8_AND_16_RINSN_ROUND(srl);
+ DISASM_8_AND_16_RINSN_ROUND(kslra);
+ DISASM_8_AND_16_PIINSN_ROUND(srai);
+ DISASM_8_AND_16_PIINSN_ROUND(srli);
+
+ DISASM_8_AND_16_RINSN(cmpeq);
+ DISASM_8_AND_16_RINSN(scmplt);
+ DISASM_8_AND_16_RINSN(scmple);
+ DISASM_8_AND_16_RINSN(ucmplt);
+ DISASM_8_AND_16_RINSN(ucmple);
+
+ DISASM_8_AND_16_RINSN(smul);
+ DISASM_8_AND_16_RINSN(smulx);
+ DISASM_8_AND_16_RINSN(umul);
+ DISASM_8_AND_16_RINSN(umulx);
+ DISASM_8_AND_16_RINSN(khm);
+ DISASM_8_AND_16_RINSN(khmx);
+
+ DISASM_8_AND_16_RINSN(smin);
+ DISASM_8_AND_16_RINSN(umin);
+ DISASM_8_AND_16_RINSN(smax);
+ DISASM_8_AND_16_RINSN(umax);
+ DISASM_8_AND_16_PIINSN(sclip);
+ DISASM_8_AND_16_PIINSN(uclip);
+ DEFINE_ONEOP(kabs16);
+ DEFINE_ONEOP(clrs16);
+ DEFINE_ONEOP(clz16);
+ DEFINE_ONEOP(clo16);
+ DEFINE_ONEOP(swap16);
+ DEFINE_ONEOP(kabs8);
+ DEFINE_ONEOP(clrs8);
+ DEFINE_ONEOP(clz8);
+ DEFINE_ONEOP(clo8);
+ DEFINE_ONEOP(swap8);
+
+ DEFINE_ONEOP(sunpkd810);
+ DEFINE_ONEOP(sunpkd820);
+ DEFINE_ONEOP(sunpkd830);
+ DEFINE_ONEOP(sunpkd831);
+ DEFINE_ONEOP(sunpkd832);
+ DEFINE_ONEOP(zunpkd810);
+ DEFINE_ONEOP(zunpkd820);
+ DEFINE_ONEOP(zunpkd830);
+ DEFINE_ONEOP(zunpkd831);
+ DEFINE_ONEOP(zunpkd832);
+
+ DEFINE_RTYPE(pkbb16);
+ DEFINE_RTYPE(pkbt16);
+ DEFINE_RTYPE(pktb16);
+ DEFINE_RTYPE(pktt16);
+ DISASM_RINSN_AND_ROUND(smmul);
+ DISASM_RINSN_AND_ROUND(kmmac);
+ DISASM_RINSN_AND_ROUND(kmmsb);
+ DISASM_RINSN_AND_ROUND(kwmmul);
+ DISASM_RINSN_AND_ROUND(smmwb);
+ DISASM_RINSN_AND_ROUND(smmwt);
+ DISASM_RINSN_AND_ROUND(kmmawb);
+ DISASM_RINSN_AND_ROUND(kmmawt);
+ DISASM_RINSN_AND_ROUND(kmmwb2);
+ DISASM_RINSN_AND_ROUND(kmmwt2);
+ DISASM_RINSN_AND_ROUND(kmmawb2);
+ DISASM_RINSN_AND_ROUND(kmmawt2);
+ DEFINE_RTYPE(smbb16)
+ DEFINE_RTYPE(smbt16)
+ DEFINE_RTYPE(smtt16)
+ DEFINE_RTYPE(kmda)
+ DEFINE_RTYPE(kmxda)
+ DEFINE_RTYPE(smds)
+ DEFINE_RTYPE(smdrs)
+ DEFINE_RTYPE(smxds)
+ DEFINE_RTYPE(kmabb)
+ DEFINE_RTYPE(kmabt)
+ DEFINE_RTYPE(kmatt)
+ DEFINE_RTYPE(kmada)
+ DEFINE_RTYPE(kmaxda)
+ DEFINE_RTYPE(kmads)
+ DEFINE_RTYPE(kmadrs)
+ DEFINE_RTYPE(kmaxds)
+ DEFINE_RTYPE(kmsda)
+ DEFINE_RTYPE(kmsxda)
+ DEFINE_RTYPE(smal)
+ DEFINE_RTYPE(sclip32)
+ DEFINE_RTYPE(uclip32)
+ DEFINE_ONEOP(clrs32);
+ DEFINE_ONEOP(clz32);
+ DEFINE_ONEOP(clo32);
+ DEFINE_RTYPE(pbsad);
+ DEFINE_RTYPE(pbsada);
+ DEFINE_RTYPE(smaqa);
+ DEFINE_RTYPE(umaqa);
+ DISASM_INSN("smaqa.su", smaqa_su, 0, {&xrd, &xrs1, &xrs2});
+
+ DEFINE_RTYPE(add64);
+ DEFINE_RTYPE(radd64);
+ DEFINE_RTYPE(uradd64);
+ DEFINE_RTYPE(kadd64);
+ DEFINE_RTYPE(ukadd64);
+ DEFINE_RTYPE(sub64);
+ DEFINE_RTYPE(rsub64);
+ DEFINE_RTYPE(ursub64);
+ DEFINE_RTYPE(ksub64);
+ DEFINE_RTYPE(uksub64);
+ DEFINE_RTYPE(smar64);
+ DEFINE_RTYPE(smsr64);
+ DEFINE_RTYPE(umar64);
+ DEFINE_RTYPE(umsr64);
+ DEFINE_RTYPE(kmar64);
+ DEFINE_RTYPE(kmsr64);
+ DEFINE_RTYPE(ukmar64);
+ DEFINE_RTYPE(ukmsr64);
+ DEFINE_RTYPE(smalbb);
+ DEFINE_RTYPE(smalbt);
+ DEFINE_RTYPE(smaltt);
+ DEFINE_RTYPE(smalda);
+ DEFINE_RTYPE(smalxda);
+ DEFINE_RTYPE(smalds);
+ DEFINE_RTYPE(smaldrs);
+ DEFINE_RTYPE(smalxds);
+ DEFINE_RTYPE(smslda);
+ DEFINE_RTYPE(smslxda);
+
+ DEFINE_RTYPE(kaddh);
+ DEFINE_RTYPE(ksubh);
+ DEFINE_RTYPE(khmbb);
+ DEFINE_RTYPE(khmbt);
+ DEFINE_RTYPE(khmtt);
+ DEFINE_RTYPE(ukaddh);
+ DEFINE_RTYPE(uksubh);
+ DEFINE_RTYPE(kaddw);
+ DEFINE_RTYPE(ukaddw);
+ DEFINE_RTYPE(ksubw);
+ DEFINE_RTYPE(uksubw);
+ DEFINE_RTYPE(kdmbb);
+ DEFINE_RTYPE(kdmbt);
+ DEFINE_RTYPE(kdmtt);
+ DEFINE_RTYPE(kslraw);
+ DISASM_INSN("kslraw.u", kslraw_u, 0, {&xrd, &xrs1, &xrs2});
+ DEFINE_RTYPE(ksllw);
+ DEFINE_PITYTPE(kslliw, 5);
+ DEFINE_RTYPE(kdmabb);
+ DEFINE_RTYPE(kdmabt);
+ DEFINE_RTYPE(kdmatt);
+ DEFINE_RTYPE(kabsw);
+ DEFINE_RTYPE(raddw);
+ DEFINE_RTYPE(uraddw);
+ DEFINE_RTYPE(rsubw);
+ DEFINE_RTYPE(ursubw);
+ DEFINE_RTYPE(maxw);
+ DEFINE_RTYPE(minw);
+ DEFINE_RTYPE(mulr64);
+ DEFINE_RTYPE(mulsr64);
+ DEFINE_RTYPE(msubr32);
+ DEFINE_RTYPE(ave);
+ DISASM_INSN("sra.u", sra_u, 0, {&xrd, &xrs1, &xrs2});
+ DISASM_INSN("srai.u", srai_u, 0, {&xrd, &xrs1, &p_imm5});
+ DEFINE_RTYPE(bitrev);
+ DEFINE_RTYPE(wext);
+ DEFINE_PITYTPE(wexti, 5);
+ DISASM_INSN("bpick", bpick, 0, {&xrd, &xrs1, &xrs2, &xrs3});
+ DEFINE_PITYTPE(insb, 3);
+ DEFINE_RTYPE(maddr32)
+
if (xlen == 32) {
DISASM_INSN("c.flw", c_flw, 0, {&rvc_fp_rs2s, &rvc_lw_address});
DISASM_INSN("c.flwsp", c_flwsp, 0, {&frd, &rvc_lwsp_address});
DISASM_INSN("c.fsw", c_fsw, 0, {&rvc_fp_rs2s, &rvc_lw_address});
DISASM_INSN("c.fswsp", c_fswsp, 0, {&rvc_fp_rs2, &rvc_swsp_address});
DISASM_INSN("c.jal", c_jal, 0, {&rvc_jump_target});
+ DEFINE_PITYTPE(bitrevi, 5);
} else {
DISASM_INSN("c.ld", c_ld, 0, {&rvc_rs2s, &rvc_ld_address});
DISASM_INSN("c.ldsp", c_ldsp, 0, {&xrd, &rvc_ldsp_address});
DISASM_INSN("c.sd", c_sd, 0, {&rvc_rs2s, &rvc_ld_address});
DISASM_INSN("c.sdsp", c_sdsp, 0, {&rvc_rs2, &rvc_sdsp_address});
DISASM_INSN("c.addiw", c_addiw, 0, {&xrd, &rvc_imm});
+ DEFINE_PITYTPE(bitrevi, 6);
+ DEFINE_RTYPE(add32);
+ DEFINE_RTYPE(radd32);
+ DEFINE_RTYPE(uradd32);
+ DEFINE_RTYPE(kadd32);
+ DEFINE_RTYPE(ukadd32);
+ DEFINE_RTYPE(sub32);
+ DEFINE_RTYPE(rsub32);
+ DEFINE_RTYPE(ursub32);
+ DEFINE_RTYPE(ksub32);
+ DEFINE_RTYPE(uksub32);
+ DEFINE_RTYPE(cras32);
+ DEFINE_RTYPE(rcras32);
+ DEFINE_RTYPE(urcras32);
+ DEFINE_RTYPE(kcras32);
+ DEFINE_RTYPE(ukcras32);
+ DEFINE_RTYPE(crsa32);
+ DEFINE_RTYPE(rcrsa32);
+ DEFINE_RTYPE(urcrsa32);
+ DEFINE_RTYPE(kcrsa32);
+ DEFINE_RTYPE(ukcrsa32);
+ DEFINE_RTYPE(stas32);
+ DEFINE_RTYPE(rstas32);
+ DEFINE_RTYPE(urstas32);
+ DEFINE_RTYPE(kstas32);
+ DEFINE_RTYPE(ukstas32);
+ DEFINE_RTYPE(stsa32);
+ DEFINE_RTYPE(rstsa32);
+ DEFINE_RTYPE(urstsa32);
+ DEFINE_RTYPE(kstsa32);
+ DEFINE_RTYPE(ukstsa32);
+ DEFINE_RTYPE(sra32);
+ DEFINE_PITYTPE(srai32, 5);
+ DISASM_INSN("sra32.u", sra32_u, 0, {&xrd, &xrs1, &xrs2});
+ DISASM_INSN("srai32.u", srai32_u, 0, {&xrd, &xrs1, &p_imm5});
+ DEFINE_RTYPE(srl32);
+ DEFINE_PITYTPE(srli32, 5);
+ DISASM_INSN("srl32.u", srl32_u, 0, {&xrd, &xrs1, &xrs2});
+ DISASM_INSN("srli32.u", srli32_u, 0, {&xrd, &xrs1, &p_imm5});
+ DEFINE_RTYPE(sll32);
+ DEFINE_PITYTPE(slli32, 5);
+ DEFINE_RTYPE(ksll32);
+ DEFINE_PITYTPE(kslli32, 5);
+ DEFINE_RTYPE(kslra32);
+ DISASM_INSN("kslra32.u", kslra32_u, 0, {&xrd, &xrs1, &xrs2});
+ DEFINE_RTYPE(smin32);
+ DEFINE_RTYPE(umin32);
+ DEFINE_RTYPE(smax32);
+ DEFINE_RTYPE(umax32);
+ DEFINE_ONEOP(kabs32);
+ DEFINE_RTYPE(khmbb16);
+ DEFINE_RTYPE(khmbt16);
+ DEFINE_RTYPE(khmtt16);
+ DEFINE_RTYPE(kdmbb16);
+ DEFINE_RTYPE(kdmbt16);
+ DEFINE_RTYPE(kdmtt16);
+ DEFINE_RTYPE(kdmabb16);
+ DEFINE_RTYPE(kdmabt16);
+ DEFINE_RTYPE(kdmatt16);
+ DEFINE_RTYPE(smbt32);
+ DEFINE_RTYPE(smtt32);
+ DEFINE_RTYPE(kmabb32);
+ DEFINE_RTYPE(kmabt32);
+ DEFINE_RTYPE(kmatt32);
+ DEFINE_RTYPE(kmda32);
+ DEFINE_RTYPE(kmxda32);
+ DEFINE_RTYPE(kmaxda32);
+ DEFINE_RTYPE(kmads32);
+ DEFINE_RTYPE(kmadrs32);
+ DEFINE_RTYPE(kmaxds32);
+ DEFINE_RTYPE(kmsda32);
+ DEFINE_RTYPE(kmsxda32);
+ DEFINE_RTYPE(smds32);
+ DEFINE_RTYPE(smdrs32);
+ DEFINE_RTYPE(smxds32);
+ DISASM_INSN("sraiw.u", sraiw_u, 0, {&xrd, &xrs1, &p_imm5});
+ DEFINE_RTYPE(pkbb32);
+ DEFINE_RTYPE(pkbt32);
+ DEFINE_RTYPE(pktb32);
+ DEFINE_RTYPE(pktt32);
}
// provide a default disassembly for all instructions as a fallback
diff --git a/riscv/arith.h b/riscv/arith.h
index 398217e..9e0c2f7 100644
--- a/riscv/arith.h
+++ b/riscv/arith.h
@@ -66,6 +66,27 @@ static inline T sat_add(T x, T y, bool &sat)
}
template<typename T, typename UT>
+static inline T sat_add(T x, T y, T z, bool &sat)
+{
+ bool sat1, sat2;
+ T a = y;
+ T b = z;
+ T res;
+
+ /* Force compiler to use cmovs instruction */
+ if (((y ^ z) & (x ^ z)) < 0) {
+ a = z;
+ b = y;
+ }
+
+ res = sat_add<T, UT>(x, a, sat1);
+ res = sat_add<T, UT>(res, b, sat2);
+ sat = sat1 || sat2;
+
+ return res;
+}
+
+template<typename T, typename UT>
static inline T sat_sub(T x, T y, bool &sat)
{
UT ux = x;
diff --git a/riscv/decode.h b/riscv/decode.h
index 72ad286..6591612 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -67,6 +67,7 @@ const int NCSR = 4096;
(((x) & 0x03) < 0x03 ? 2 : \
((x) & 0x1f) < 0x1f ? 4 : \
((x) & 0x3f) < 0x3f ? 6 : \
+ ((x) & 0x7f) == 0x7f ? 4 : \
8)
#define MAX_INSN_LENGTH 8
#define PC_ALIGN 2
@@ -132,6 +133,12 @@ public:
uint64_t v_vma() { return x(27, 1); }
uint64_t v_mew() { return x(28, 1); }
+ uint64_t p_imm2() { return x(20, 2); }
+ uint64_t p_imm3() { return x(20, 3); }
+ uint64_t p_imm4() { return x(20, 4); }
+ uint64_t p_imm5() { return x(20, 5); }
+ uint64_t p_imm6() { return x(20, 6); }
+
private:
insn_bits_t b;
uint64_t x(int lo, int len) { return (b >> lo) & ((insn_bits_t(1) << len)-1); }
@@ -2384,6 +2391,508 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
break; \
}
+// The p-extension support is contributed by
+// Programming Langauge Lab, Department of Computer Science, National Tsing-Hua University, Taiwan
+
+#define P_FIELD(R, INDEX, SIZE) \
+ (type_sew_t<SIZE>::type)get_field(R, make_mask64(((INDEX) * SIZE), SIZE))
+
+#define P_UFIELD(R, INDEX, SIZE) \
+ (type_usew_t<SIZE>::type)get_field(R, make_mask64(((INDEX) * SIZE), SIZE))
+
+#define P_B(R, INDEX) P_UFIELD(R, INDEX, 8)
+#define P_H(R, INDEX) P_UFIELD(R, INDEX, 16)
+#define P_W(R, INDEX) P_UFIELD(R, INDEX, 32)
+#define P_SB(R, INDEX) P_FIELD(R, INDEX, 8)
+#define P_SH(R, INDEX) P_FIELD(R, INDEX, 16)
+#define P_SW(R, INDEX) P_FIELD(R, INDEX, 32)
+
+#define READ_REG_PAIR(reg) \
+ MMU.is_target_big_endian() \
+ ? ((zext32(READ_REG(reg)) << 32) + zext32(READ_REG(reg + 1))) \
+ : ((zext32(READ_REG(reg + 1)) << 32) + zext32(READ_REG(reg)))
+
+#define RS1_PAIR READ_REG_PAIR(insn.rs1())
+#define RS2_PAIR READ_REG_PAIR(insn.rs2())
+#define RD_PAIR READ_REG_PAIR(insn.rd())
+
+#define WRITE_PD() \
+ rd_tmp = set_field(rd_tmp, make_mask64((i * sizeof(pd) * 8), sizeof(pd) * 8), pd);
+
+#define WRITE_RD_PAIR(value) \
+ if (MMU.is_target_big_endian()) { \
+ WRITE_REG(insn.rd() + 1, sext32(value)); \
+ WRITE_REG(insn.rd(), ((sreg_t)value) >> 32); \
+ } else { \
+ WRITE_REG(insn.rd(), sext32(value)); \
+ WRITE_REG(insn.rd() + 1, ((sreg_t)value) >> 32); \
+ }
+
+#define P_SET_OV(ov) \
+ P.VU.vxsat |= ov;
+
+#define P_SAT(R, BIT) \
+ if (R > INT##BIT##_MAX) { \
+ R = INT##BIT##_MAX; \
+ P_SET_OV(1); \
+ } else if (R < INT##BIT##_MIN) { \
+ R = INT##BIT##_MIN; \
+ P_SET_OV(1); \
+ }
+
+#define P_SATU(R, BIT) \
+ if (R > UINT##BIT##_MAX) { \
+ R = UINT##BIT##_MAX; \
+ P_SET_OV(1); \
+ } else if (R < 0) { \
+ P_SET_OV(1); \
+ R = 0; \
+ }
+
+#define P_LOOP_BASE(BIT) \
+ require_extension('P'); \
+ require(BIT == e8 || BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = RD; \
+ reg_t rs1 = RS1; \
+ reg_t rs2 = RS2; \
+ sreg_t len = xlen / BIT; \
+ for (sreg_t i = len - 1; i >= 0; --i) {
+
+#define P_ONE_LOOP_BASE(BIT) \
+ require_extension('P'); \
+ require(BIT == e8 || BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = RD; \
+ reg_t rs1 = RS1; \
+ sreg_t len = xlen / BIT; \
+ for (sreg_t i = len - 1; i >= 0; --i) {
+
+#define P_I_LOOP_BASE(BIT, IMMBIT) \
+ require_extension('P'); \
+ require(BIT == e8 || BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = RD; \
+ reg_t rs1 = RS1; \
+ type_usew_t<BIT>::type imm##IMMBIT##u = insn.p_imm##IMMBIT(); \
+ sreg_t len = xlen / BIT; \
+ for (sreg_t i = len - 1; i >= 0; --i) {
+
+#define P_X_LOOP_BASE(BIT, LOWBIT) \
+ require_extension('P'); \
+ require(BIT == e8 || BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = RD; \
+ reg_t rs1 = RS1; \
+ type_usew_t<BIT>::type sa = RS2 & ((uint64_t(1) << LOWBIT) - 1); \
+ type_sew_t<BIT>::type ssa = int64_t(RS2) << (64 - LOWBIT) >> (64 - LOWBIT); \
+ sreg_t len = xlen / BIT; \
+ for (sreg_t i = len - 1; i >= 0; --i) {
+
+#define P_MUL_LOOP_BASE(BIT) \
+ require_extension('P'); \
+ require(BIT == e8 || BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = RD; \
+ reg_t rs1 = RS1; \
+ reg_t rs2 = RS2; \
+ sreg_t len = 32 / BIT; \
+ for (sreg_t i = len - 1; i >= 0; --i) {
+
+#define P_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ require_extension('P'); \
+ require(BIT == e16 || BIT == e32 || BIT == e64); \
+ reg_t rd_tmp = USE_RD ? zext_xlen(RD) : 0; \
+ reg_t rs1 = zext_xlen(RS1); \
+ reg_t rs2 = zext_xlen(RS2); \
+ sreg_t len = 64 / BIT; \
+ sreg_t len_inner = BIT / BIT_INNER; \
+ for (sreg_t i = len - 1; i >= 0; --i) { \
+ sreg_t pd_res = P_FIELD(rd_tmp, i, BIT); \
+ for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) {
+
+#define P_REDUCTION_ULOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ require_extension('P'); \
+ require(BIT == e16 || BIT == e32 || BIT == e64); \
+ reg_t rd_tmp = USE_RD ? zext_xlen(RD) : 0; \
+ reg_t rs1 = zext_xlen(RS1); \
+ reg_t rs2 = zext_xlen(RS2); \
+ sreg_t len = 64 / BIT; \
+ sreg_t len_inner = BIT / BIT_INNER; \
+ for (sreg_t i = len - 1; i >=0; --i) { \
+ reg_t pd_res = P_UFIELD(rd_tmp, i, BIT); \
+ for (sreg_t j = i * len_inner; j < (i + 1) * len_inner; ++j) {
+
+#define P_PARAMS(BIT) \
+ auto pd = P_FIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_FIELD(rs1, i, BIT); \
+ auto ps2 = P_FIELD(rs2, i, BIT);
+
+#define P_UPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_UFIELD(rs1, i, BIT); \
+ auto ps2 = P_UFIELD(rs2, i, BIT);
+
+#define P_CORSS_PARAMS(BIT) \
+ auto pd = P_FIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_FIELD(rs1, i, BIT); \
+ auto ps2 = P_FIELD(rs2, (i ^ 1), BIT);
+
+#define P_CORSS_UPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_UFIELD(rs1, i, BIT); \
+ auto ps2 = P_UFIELD(rs2, (i ^ 1), BIT);
+
+#define P_ONE_PARAMS(BIT) \
+ auto pd = P_FIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_FIELD(rs1, i, BIT);
+
+#define P_ONE_UPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_UFIELD(rs1, i, BIT);
+
+#define P_ONE_SUPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT); \
+ auto ps1 = P_FIELD(rs1, i, BIT);
+
+#define P_MUL_PARAMS(BIT) \
+ auto pd = P_FIELD(rd_tmp, i, BIT * 2); \
+ auto ps1 = P_FIELD(rs1, i, BIT); \
+ auto ps2 = P_FIELD(rs2, i, BIT);
+
+#define P_MUL_UPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT * 2); \
+ auto ps1 = P_UFIELD(rs1, i, BIT); \
+ auto ps2 = P_UFIELD(rs2, i, BIT);
+
+#define P_MUL_CROSS_PARAMS(BIT) \
+ auto pd = P_FIELD(rd_tmp, i, BIT * 2); \
+ auto ps1 = P_FIELD(rs1, i, BIT); \
+ auto ps2 = P_FIELD(rs2, (i ^ 1), BIT);
+
+#define P_MUL_CROSS_UPARAMS(BIT) \
+ auto pd = P_UFIELD(rd_tmp, i, BIT*2); \
+ auto ps1 = P_UFIELD(rs1, i, BIT); \
+ auto ps2 = P_UFIELD(rs2, (i ^ 1), BIT);
+
+#define P_REDUCTION_PARAMS(BIT_INNER) \
+ auto ps1 = P_FIELD(rs1, j, BIT_INNER); \
+ auto ps2 = P_FIELD(rs2, j, BIT_INNER);
+
+#define P_REDUCTION_UPARAMS(BIT_INNER) \
+ auto ps1 = P_UFIELD(rs1, j, BIT_INNER); \
+ auto ps2 = P_UFIELD(rs2, j, BIT_INNER);
+
+#define P_REDUCTION_SUPARAMS(BIT_INNER) \
+ auto ps1 = P_FIELD(rs1, j, BIT_INNER); \
+ auto ps2 = P_UFIELD(rs2, j, BIT_INNER);
+
+#define P_REDUCTION_CROSS_PARAMS(BIT_INNER) \
+ auto ps1 = P_FIELD(rs1, j, BIT_INNER); \
+ auto ps2 = P_FIELD(rs2, (j ^ 1), BIT_INNER);
+
+#define P_LOOP_BODY(BIT, BODY) { \
+ P_PARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_ULOOP_BODY(BIT, BODY) { \
+ P_UPARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_ONE_LOOP_BODY(BIT, BODY) { \
+ P_ONE_PARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_CROSS_LOOP_BODY(BIT, BODY) { \
+ P_CORSS_PARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_CROSS_ULOOP_BODY(BIT, BODY) { \
+ P_CORSS_UPARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_ONE_ULOOP_BODY(BIT, BODY) { \
+ P_ONE_UPARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_MUL_LOOP_BODY(BIT, BODY) { \
+ P_MUL_PARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_MUL_ULOOP_BODY(BIT, BODY) { \
+ P_MUL_UPARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_MUL_CROSS_LOOP_BODY(BIT, BODY) { \
+ P_MUL_CROSS_PARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_MUL_CROSS_ULOOP_BODY(BIT, BODY) { \
+ P_MUL_CROSS_UPARAMS(BIT) \
+ BODY \
+ WRITE_PD(); \
+}
+
+#define P_LOOP(BIT, BODY) \
+ P_LOOP_BASE(BIT) \
+ P_LOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_ONE_LOOP(BIT, BODY) \
+ P_ONE_LOOP_BASE(BIT) \
+ P_ONE_LOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_ULOOP(BIT, BODY) \
+ P_LOOP_BASE(BIT) \
+ P_ULOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_CROSS_LOOP(BIT, BODY1, BODY2) \
+ P_LOOP_BASE(BIT) \
+ P_CROSS_LOOP_BODY(BIT, BODY1) \
+ --i; \
+ if (sizeof(#BODY2) == 1) { \
+ P_CROSS_LOOP_BODY(BIT, BODY1) \
+ } \
+ else { \
+ P_CROSS_LOOP_BODY(BIT, BODY2) \
+ } \
+ P_LOOP_END()
+
+#define P_CROSS_ULOOP(BIT, BODY1, BODY2) \
+ P_LOOP_BASE(BIT) \
+ P_CROSS_ULOOP_BODY(BIT, BODY1) \
+ --i; \
+ P_CROSS_ULOOP_BODY(BIT, BODY2) \
+ P_LOOP_END()
+
+#define P_STRAIGHT_LOOP(BIT, BODY1, BODY2) \
+ P_LOOP_BASE(BIT) \
+ P_LOOP_BODY(BIT, BODY1) \
+ --i; \
+ P_LOOP_BODY(BIT, BODY2) \
+ P_LOOP_END()
+
+#define P_STRAIGHT_ULOOP(BIT, BODY1, BODY2) \
+ P_LOOP_BASE(BIT) \
+ P_ULOOP_BODY(BIT, BODY1) \
+ --i; \
+ P_ULOOP_BODY(BIT, BODY2) \
+ P_LOOP_END()
+
+#define P_X_LOOP(BIT, RS2_LOW_BIT, BODY) \
+ P_X_LOOP_BASE(BIT, RS2_LOW_BIT) \
+ P_ONE_LOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_X_ULOOP(BIT, RS2_LOW_BIT, BODY) \
+ P_X_LOOP_BASE(BIT, RS2_LOW_BIT) \
+ P_ONE_ULOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_I_LOOP(BIT, IMMBIT, BODY) \
+ P_I_LOOP_BASE(BIT, IMMBIT) \
+ P_ONE_LOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_I_ULOOP(BIT, IMMBIT, BODY) \
+ P_I_LOOP_BASE(BIT, IMMBIT) \
+ P_ONE_ULOOP_BODY(BIT, BODY) \
+ P_LOOP_END()
+
+#define P_MUL_LOOP(BIT, BODY) \
+ P_MUL_LOOP_BASE(BIT) \
+ P_MUL_LOOP_BODY(BIT, BODY) \
+ P_PAIR_LOOP_END()
+
+#define P_MUL_ULOOP(BIT, BODY) \
+ P_MUL_LOOP_BASE(BIT) \
+ P_MUL_ULOOP_BODY(BIT, BODY) \
+ P_PAIR_LOOP_END()
+
+#define P_MUL_CROSS_LOOP(BIT, BODY) \
+ P_MUL_LOOP_BASE(BIT) \
+ P_MUL_CROSS_LOOP_BODY(BIT, BODY) \
+ P_PAIR_LOOP_END()
+
+#define P_MUL_CROSS_ULOOP(BIT, BODY) \
+ P_MUL_LOOP_BASE(BIT) \
+ P_MUL_CROSS_ULOOP_BODY(BIT, BODY) \
+ P_PAIR_LOOP_END()
+
+#define P_REDUCTION_LOOP(BIT, BIT_INNER, USE_RD, IS_SAT, BODY) \
+ P_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ P_REDUCTION_PARAMS(BIT_INNER) \
+ BODY \
+ P_REDUCTION_LOOP_END(BIT, IS_SAT)
+
+#define P_REDUCTION_ULOOP(BIT, BIT_INNER, USE_RD, IS_SAT, BODY) \
+ P_REDUCTION_ULOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ P_REDUCTION_UPARAMS(BIT_INNER) \
+ BODY \
+ P_REDUCTION_ULOOP_END(BIT, IS_SAT)
+
+#define P_REDUCTION_SULOOP(BIT, BIT_INNER, USE_RD, IS_SAT, BODY) \
+ P_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ P_REDUCTION_SUPARAMS(BIT_INNER) \
+ BODY \
+ P_REDUCTION_LOOP_END(BIT, IS_SAT)
+
+#define P_REDUCTION_CROSS_LOOP(BIT, BIT_INNER, USE_RD, IS_SAT, BODY) \
+ P_REDUCTION_LOOP_BASE(BIT, BIT_INNER, USE_RD) \
+ P_REDUCTION_CROSS_PARAMS(BIT_INNER) \
+ BODY \
+ P_REDUCTION_LOOP_END(BIT, IS_SAT)
+
+#define P_LOOP_END() \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_PAIR_LOOP_END() \
+ } \
+ if (xlen == 32) { \
+ WRITE_RD_PAIR(rd_tmp); \
+ } \
+ else { \
+ WRITE_RD(sext_xlen(rd_tmp)); \
+ }
+
+#define P_REDUCTION_LOOP_END(BIT, IS_SAT) \
+ } \
+ if (IS_SAT) { \
+ P_SAT(pd_res, BIT); \
+ } \
+ type_usew_t<BIT>::type pd = pd_res; \
+ WRITE_PD(); \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_REDUCTION_ULOOP_END(BIT, IS_SAT) \
+ } \
+ if (IS_SAT) { \
+ P_SATU(pd_res, BIT); \
+ } \
+ type_usew_t<BIT>::type pd = pd_res; \
+ WRITE_PD(); \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_SUNPKD8(X, Y) \
+ require_extension('P'); \
+ reg_t rd_tmp = 0; \
+ int16_t pd[4] = { \
+ P_SB(RS1, Y), \
+ P_SB(RS1, X), \
+ P_SB(RS1, Y + 4), \
+ P_SB(RS1, X + 4), \
+ }; \
+ if (xlen == 64) { \
+ memcpy(&rd_tmp, pd, 8); \
+ } else { \
+ memcpy(&rd_tmp, pd, 4); \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_ZUNPKD8(X, Y) \
+ require_extension('P'); \
+ reg_t rd_tmp = 0; \
+ uint16_t pd[4] = { \
+ P_B(RS1, Y), \
+ P_B(RS1, X), \
+ P_B(RS1, Y + 4), \
+ P_B(RS1, X + 4), \
+ }; \
+ if (xlen == 64) { \
+ memcpy(&rd_tmp, pd, 8); \
+ } else { \
+ memcpy(&rd_tmp, pd, 4); \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_PK(BIT, X, Y) \
+ require_extension('P'); \
+ require(BIT == e16 || BIT == e32); \
+ reg_t rd_tmp = 0, rs1 = RS1, rs2 = RS2; \
+ for (sreg_t i = 0; i < xlen / BIT / 2; i++) { \
+ rd_tmp = set_field(rd_tmp, make_mask64(i * 2 * BIT, BIT), \
+ P_UFIELD(RS2, i * 2 + Y, BIT)); \
+ rd_tmp = set_field(rd_tmp, make_mask64((i * 2 + 1) * BIT, BIT), \
+ P_UFIELD(RS1, i * 2 + X, BIT)); \
+ } \
+ WRITE_RD(sext_xlen(rd_tmp));
+
+#define P_64_PROFILE_BASE() \
+ require_extension('P'); \
+ sreg_t rd, rs1, rs2;
+
+#define P_64_UPROFILE_BASE() \
+ require_extension('P'); \
+ reg_t rd, rs1, rs2;
+
+#define P_64_PROFILE_PARAM(USE_RD, INPUT_PAIR) \
+ if (xlen == 32) { \
+ rs1 = INPUT_PAIR ? RS1_PAIR : RS1; \
+ rs2 = INPUT_PAIR ? RS2_PAIR : RS2; \
+ rd = USE_RD ? RD_PAIR : 0; \
+ } else { \
+ rs1 = RS1; \
+ rs2 = RS2; \
+ rd = USE_RD ? RD : 0; \
+ }
+
+#define P_64_PROFILE(BODY) \
+ P_64_PROFILE_BASE() \
+ P_64_PROFILE_PARAM(false, true) \
+ BODY \
+ P_64_PROFILE_END() \
+
+#define P_64_UPROFILE(BODY) \
+ P_64_UPROFILE_BASE() \
+ P_64_PROFILE_PARAM(false, true) \
+ BODY \
+ P_64_PROFILE_END() \
+
+#define P_64_PROFILE_REDUCTION(BIT, BODY) \
+ P_64_PROFILE_BASE() \
+ P_64_PROFILE_PARAM(true, false) \
+ for (sreg_t i = 0; i < xlen / BIT; i++) { \
+ sreg_t ps1 = P_FIELD(rs1, i, BIT); \
+ sreg_t ps2 = P_FIELD(rs2, i, BIT); \
+ BODY \
+ } \
+ P_64_PROFILE_END() \
+
+#define P_64_UPROFILE_REDUCTION(BIT, BODY) \
+ P_64_UPROFILE_BASE() \
+ P_64_PROFILE_PARAM(true, false) \
+ for (sreg_t i = 0; i < xlen / BIT; i++) { \
+ reg_t ps1 = P_UFIELD(rs1, i, BIT); \
+ reg_t ps2 = P_UFIELD(rs2, i, BIT); \
+ BODY \
+ } \
+ P_64_PROFILE_END() \
+
+#define P_64_PROFILE_END() \
+ if (xlen == 32) { \
+ WRITE_RD_PAIR(rd); \
+ } else { \
+ WRITE_RD(sext_xlen(rd)); \
+ }
+
#define DEBUG_START 0x0
#define DEBUG_END (0x1000 - 1)
diff --git a/riscv/encoding.h b/riscv/encoding.h
index 3c5a473..55088d6 100644
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@@ -2142,6 +2142,660 @@
#define MASK_VL4R_V 0xfff0707f
#define MATCH_VL8R_V 0x1e807007
#define MASK_VL8R_V 0xfff0707f
+#define MATCH_ADD8 0x48000077
+#define MASK_ADD8 0xfe00707f
+#define MATCH_ADD16 0x40000077
+#define MASK_ADD16 0xfe00707f
+#define MATCH_ADD64 0xc0001077
+#define MASK_ADD64 0xfe00707f
+#define MATCH_AVE 0xe0000077
+#define MASK_AVE 0xfe00707f
+#define MATCH_BITREV 0xe6000077
+#define MASK_BITREV 0xfe00707f
+#define MATCH_BITREVI 0xe8000077
+#define MASK_BITREVI 0xfc00707f
+#define MATCH_BPICK 0x3077
+#define MASK_BPICK 0x600707f
+#define MATCH_CLRS8 0xae000077
+#define MASK_CLRS8 0xfff0707f
+#define MATCH_CLRS16 0xae800077
+#define MASK_CLRS16 0xfff0707f
+#define MATCH_CLRS32 0xaf800077
+#define MASK_CLRS32 0xfff0707f
+#define MATCH_CLO8 0xae300077
+#define MASK_CLO8 0xfff0707f
+#define MATCH_CLO16 0xaeb00077
+#define MASK_CLO16 0xfff0707f
+#define MATCH_CLO32 0xafb00077
+#define MASK_CLO32 0xfff0707f
+#define MATCH_CLZ8 0xae100077
+#define MASK_CLZ8 0xfff0707f
+#define MATCH_CLZ16 0xae900077
+#define MASK_CLZ16 0xfff0707f
+#define MATCH_CLZ32 0xaf900077
+#define MASK_CLZ32 0xfff0707f
+#define MATCH_CMPEQ8 0x4e000077
+#define MASK_CMPEQ8 0xfe00707f
+#define MATCH_CMPEQ16 0x4c000077
+#define MASK_CMPEQ16 0xfe00707f
+#define MATCH_CRAS16 0x44000077
+#define MASK_CRAS16 0xfe00707f
+#define MATCH_CRSA16 0x46000077
+#define MASK_CRSA16 0xfe00707f
+#define MATCH_INSB 0xac000077
+#define MASK_INSB 0xff80707f
+#define MATCH_KABS8 0xad000077
+#define MASK_KABS8 0xfff0707f
+#define MATCH_KABS16 0xad100077
+#define MASK_KABS16 0xfff0707f
+#define MATCH_KABSW 0xad400077
+#define MASK_KABSW 0xfff0707f
+#define MATCH_KADD8 0x18000077
+#define MASK_KADD8 0xfe00707f
+#define MATCH_KADD16 0x10000077
+#define MASK_KADD16 0xfe00707f
+#define MATCH_KADD64 0x90001077
+#define MASK_KADD64 0xfe00707f
+#define MATCH_KADDH 0x4001077
+#define MASK_KADDH 0xfe00707f
+#define MATCH_KADDW 0x1077
+#define MASK_KADDW 0xfe00707f
+#define MATCH_KCRAS16 0x14000077
+#define MASK_KCRAS16 0xfe00707f
+#define MATCH_KCRSA16 0x16000077
+#define MASK_KCRSA16 0xfe00707f
+#define MATCH_KDMBB 0xa001077
+#define MASK_KDMBB 0xfe00707f
+#define MATCH_KDMBT 0x1a001077
+#define MASK_KDMBT 0xfe00707f
+#define MATCH_KDMTT 0x2a001077
+#define MASK_KDMTT 0xfe00707f
+#define MATCH_KDMABB 0xd2001077
+#define MASK_KDMABB 0xfe00707f
+#define MATCH_KDMABT 0xe2001077
+#define MASK_KDMABT 0xfe00707f
+#define MATCH_KDMATT 0xf2001077
+#define MASK_KDMATT 0xfe00707f
+#define MATCH_KHM8 0x8e000077
+#define MASK_KHM8 0xfe00707f
+#define MATCH_KHMX8 0x9e000077
+#define MASK_KHMX8 0xfe00707f
+#define MATCH_KHM16 0x86000077
+#define MASK_KHM16 0xfe00707f
+#define MATCH_KHMX16 0x96000077
+#define MASK_KHMX16 0xfe00707f
+#define MATCH_KHMBB 0xc001077
+#define MASK_KHMBB 0xfe00707f
+#define MATCH_KHMBT 0x1c001077
+#define MASK_KHMBT 0xfe00707f
+#define MATCH_KHMTT 0x2c001077
+#define MASK_KHMTT 0xfe00707f
+#define MATCH_KMABB 0x5a001077
+#define MASK_KMABB 0xfe00707f
+#define MATCH_KMABT 0x6a001077
+#define MASK_KMABT 0xfe00707f
+#define MATCH_KMATT 0x7a001077
+#define MASK_KMATT 0xfe00707f
+#define MATCH_KMADA 0x48001077
+#define MASK_KMADA 0xfe00707f
+#define MATCH_KMAXDA 0x4a001077
+#define MASK_KMAXDA 0xfe00707f
+#define MATCH_KMADS 0x5c001077
+#define MASK_KMADS 0xfe00707f
+#define MATCH_KMADRS 0x6c001077
+#define MASK_KMADRS 0xfe00707f
+#define MATCH_KMAXDS 0x7c001077
+#define MASK_KMAXDS 0xfe00707f
+#define MATCH_KMAR64 0x94001077
+#define MASK_KMAR64 0xfe00707f
+#define MATCH_KMDA 0x38001077
+#define MASK_KMDA 0xfe00707f
+#define MATCH_KMXDA 0x3a001077
+#define MASK_KMXDA 0xfe00707f
+#define MATCH_KMMAC 0x60001077
+#define MASK_KMMAC 0xfe00707f
+#define MATCH_KMMAC_U 0x70001077
+#define MASK_KMMAC_U 0xfe00707f
+#define MATCH_KMMAWB 0x46001077
+#define MASK_KMMAWB 0xfe00707f
+#define MATCH_KMMAWB_U 0x56001077
+#define MASK_KMMAWB_U 0xfe00707f
+#define MATCH_KMMAWB2 0xce001077
+#define MASK_KMMAWB2 0xfe00707f
+#define MATCH_KMMAWB2_U 0xde001077
+#define MASK_KMMAWB2_U 0xfe00707f
+#define MATCH_KMMAWT 0x66001077
+#define MASK_KMMAWT 0xfe00707f
+#define MATCH_KMMAWT_U 0x76001077
+#define MASK_KMMAWT_U 0xfe00707f
+#define MATCH_KMMAWT2 0xee001077
+#define MASK_KMMAWT2 0xfe00707f
+#define MATCH_KMMAWT2_U 0xfe001077
+#define MASK_KMMAWT2_U 0xfe00707f
+#define MATCH_KMMSB 0x42001077
+#define MASK_KMMSB 0xfe00707f
+#define MATCH_KMMSB_U 0x52001077
+#define MASK_KMMSB_U 0xfe00707f
+#define MATCH_KMMWB2 0x8e001077
+#define MASK_KMMWB2 0xfe00707f
+#define MATCH_KMMWB2_U 0x9e001077
+#define MASK_KMMWB2_U 0xfe00707f
+#define MATCH_KMMWT2 0xae001077
+#define MASK_KMMWT2 0xfe00707f
+#define MATCH_KMMWT2_U 0xbe001077
+#define MASK_KMMWT2_U 0xfe00707f
+#define MATCH_KMSDA 0x4c001077
+#define MASK_KMSDA 0xfe00707f
+#define MATCH_KMSXDA 0x4e001077
+#define MASK_KMSXDA 0xfe00707f
+#define MATCH_KMSR64 0x96001077
+#define MASK_KMSR64 0xfe00707f
+#define MATCH_KSLLW 0x26001077
+#define MASK_KSLLW 0xfe00707f
+#define MATCH_KSLLIW 0x36001077
+#define MASK_KSLLIW 0xfe00707f
+#define MATCH_KSLL8 0x6c000077
+#define MASK_KSLL8 0xfe00707f
+#define MATCH_KSLLI8 0x7c800077
+#define MASK_KSLLI8 0xff80707f
+#define MATCH_KSLL16 0x64000077
+#define MASK_KSLL16 0xfe00707f
+#define MATCH_KSLLI16 0x75000077
+#define MASK_KSLLI16 0xff00707f
+#define MATCH_KSLRA8 0x5e000077
+#define MASK_KSLRA8 0xfe00707f
+#define MATCH_KSLRA8_U 0x6e000077
+#define MASK_KSLRA8_U 0xfe00707f
+#define MATCH_KSLRA16 0x56000077
+#define MASK_KSLRA16 0xfe00707f
+#define MATCH_KSLRA16_U 0x66000077
+#define MASK_KSLRA16_U 0xfe00707f
+#define MATCH_KSLRAW 0x6e001077
+#define MASK_KSLRAW 0xfe00707f
+#define MATCH_KSLRAW_U 0x7e001077
+#define MASK_KSLRAW_U 0xfe00707f
+#define MATCH_KSTAS16 0xc4002077
+#define MASK_KSTAS16 0xfe00707f
+#define MATCH_KSTSA16 0xc6002077
+#define MASK_KSTSA16 0xfe00707f
+#define MATCH_KSUB8 0x1a000077
+#define MASK_KSUB8 0xfe00707f
+#define MATCH_KSUB16 0x12000077
+#define MASK_KSUB16 0xfe00707f
+#define MATCH_KSUB64 0x92001077
+#define MASK_KSUB64 0xfe00707f
+#define MATCH_KSUBH 0x6001077
+#define MASK_KSUBH 0xfe00707f
+#define MATCH_KSUBW 0x2001077
+#define MASK_KSUBW 0xfe00707f
+#define MATCH_KWMMUL 0x62001077
+#define MASK_KWMMUL 0xfe00707f
+#define MATCH_KWMMUL_U 0x72001077
+#define MASK_KWMMUL_U 0xfe00707f
+#define MATCH_MADDR32 0xc4001077
+#define MASK_MADDR32 0xfe00707f
+#define MATCH_MAXW 0xf2000077
+#define MASK_MAXW 0xfe00707f
+#define MATCH_MINW 0xf0000077
+#define MASK_MINW 0xfe00707f
+#define MATCH_MSUBR32 0xc6001077
+#define MASK_MSUBR32 0xfe00707f
+#define MATCH_MULR64 0xf0001077
+#define MASK_MULR64 0xfe00707f
+#define MATCH_MULSR64 0xe0001077
+#define MASK_MULSR64 0xfe00707f
+#define MATCH_PBSAD 0xfc000077
+#define MASK_PBSAD 0xfe00707f
+#define MATCH_PBSADA 0xfe000077
+#define MASK_PBSADA 0xfe00707f
+#define MATCH_PKBB16 0xe001077
+#define MASK_PKBB16 0xfe00707f
+#define MATCH_PKBT16 0x1e001077
+#define MASK_PKBT16 0xfe00707f
+#define MATCH_PKTT16 0x2e001077
+#define MASK_PKTT16 0xfe00707f
+#define MATCH_PKTB16 0x3e001077
+#define MASK_PKTB16 0xfe00707f
+#define MATCH_RADD8 0x8000077
+#define MASK_RADD8 0xfe00707f
+#define MATCH_RADD16 0x77
+#define MASK_RADD16 0xfe00707f
+#define MATCH_RADD64 0x80001077
+#define MASK_RADD64 0xfe00707f
+#define MATCH_RADDW 0x20001077
+#define MASK_RADDW 0xfe00707f
+#define MATCH_RCRAS16 0x4000077
+#define MASK_RCRAS16 0xfe00707f
+#define MATCH_RCRSA16 0x6000077
+#define MASK_RCRSA16 0xfe00707f
+#define MATCH_RSTAS16 0xb4002077
+#define MASK_RSTAS16 0xfe00707f
+#define MATCH_RSTSA16 0xb6002077
+#define MASK_RSTSA16 0xfe00707f
+#define MATCH_RSUB8 0xa000077
+#define MASK_RSUB8 0xfe00707f
+#define MATCH_RSUB16 0x2000077
+#define MASK_RSUB16 0xfe00707f
+#define MATCH_RSUB64 0x82001077
+#define MASK_RSUB64 0xfe00707f
+#define MATCH_RSUBW 0x22001077
+#define MASK_RSUBW 0xfe00707f
+#define MATCH_SCLIP8 0x8c000077
+#define MASK_SCLIP8 0xff80707f
+#define MATCH_SCLIP16 0x84000077
+#define MASK_SCLIP16 0xff00707f
+#define MATCH_SCLIP32 0xe4000077
+#define MASK_SCLIP32 0xfe00707f
+#define MATCH_SCMPLE8 0x1e000077
+#define MASK_SCMPLE8 0xfe00707f
+#define MATCH_SCMPLE16 0x1c000077
+#define MASK_SCMPLE16 0xfe00707f
+#define MATCH_SCMPLT8 0xe000077
+#define MASK_SCMPLT8 0xfe00707f
+#define MATCH_SCMPLT16 0xc000077
+#define MASK_SCMPLT16 0xfe00707f
+#define MATCH_SLL8 0x5c000077
+#define MASK_SLL8 0xfe00707f
+#define MATCH_SLLI8 0x7c000077
+#define MASK_SLLI8 0xff80707f
+#define MATCH_SLL16 0x54000077
+#define MASK_SLL16 0xfe00707f
+#define MATCH_SLLI16 0x74000077
+#define MASK_SLLI16 0xff00707f
+#define MATCH_SMAL 0x5e001077
+#define MASK_SMAL 0xfe00707f
+#define MATCH_SMALBB 0x88001077
+#define MASK_SMALBB 0xfe00707f
+#define MATCH_SMALBT 0x98001077
+#define MASK_SMALBT 0xfe00707f
+#define MATCH_SMALTT 0xa8001077
+#define MASK_SMALTT 0xfe00707f
+#define MATCH_SMALDA 0x8c001077
+#define MASK_SMALDA 0xfe00707f
+#define MATCH_SMALXDA 0x9c001077
+#define MASK_SMALXDA 0xfe00707f
+#define MATCH_SMALDS 0x8a001077
+#define MASK_SMALDS 0xfe00707f
+#define MATCH_SMALDRS 0x9a001077
+#define MASK_SMALDRS 0xfe00707f
+#define MATCH_SMALXDS 0xaa001077
+#define MASK_SMALXDS 0xfe00707f
+#define MATCH_SMAR64 0x84001077
+#define MASK_SMAR64 0xfe00707f
+#define MATCH_SMAQA 0xc8000077
+#define MASK_SMAQA 0xfe00707f
+#define MATCH_SMAQA_SU 0xca000077
+#define MASK_SMAQA_SU 0xfe00707f
+#define MATCH_SMAX8 0x8a000077
+#define MASK_SMAX8 0xfe00707f
+#define MATCH_SMAX16 0x82000077
+#define MASK_SMAX16 0xfe00707f
+#define MATCH_SMBB16 0x8001077
+#define MASK_SMBB16 0xfe00707f
+#define MATCH_SMBT16 0x18001077
+#define MASK_SMBT16 0xfe00707f
+#define MATCH_SMTT16 0x28001077
+#define MASK_SMTT16 0xfe00707f
+#define MATCH_SMDS 0x58001077
+#define MASK_SMDS 0xfe00707f
+#define MATCH_SMDRS 0x68001077
+#define MASK_SMDRS 0xfe00707f
+#define MATCH_SMXDS 0x78001077
+#define MASK_SMXDS 0xfe00707f
+#define MATCH_SMIN8 0x88000077
+#define MASK_SMIN8 0xfe00707f
+#define MATCH_SMIN16 0x80000077
+#define MASK_SMIN16 0xfe00707f
+#define MATCH_SMMUL 0x40001077
+#define MASK_SMMUL 0xfe00707f
+#define MATCH_SMMUL_U 0x50001077
+#define MASK_SMMUL_U 0xfe00707f
+#define MATCH_SMMWB 0x44001077
+#define MASK_SMMWB 0xfe00707f
+#define MATCH_SMMWB_U 0x54001077
+#define MASK_SMMWB_U 0xfe00707f
+#define MATCH_SMMWT 0x64001077
+#define MASK_SMMWT 0xfe00707f
+#define MATCH_SMMWT_U 0x74001077
+#define MASK_SMMWT_U 0xfe00707f
+#define MATCH_SMSLDA 0xac001077
+#define MASK_SMSLDA 0xfe00707f
+#define MATCH_SMSLXDA 0xbc001077
+#define MASK_SMSLXDA 0xfe00707f
+#define MATCH_SMSR64 0x86001077
+#define MASK_SMSR64 0xfe00707f
+#define MATCH_SMUL8 0xa8000077
+#define MASK_SMUL8 0xfe00707f
+#define MATCH_SMULX8 0xaa000077
+#define MASK_SMULX8 0xfe00707f
+#define MATCH_SMUL16 0xa0000077
+#define MASK_SMUL16 0xfe00707f
+#define MATCH_SMULX16 0xa2000077
+#define MASK_SMULX16 0xfe00707f
+#define MATCH_SRA_U 0x24001077
+#define MASK_SRA_U 0xfe00707f
+#define MATCH_SRAI_U 0xd4001077
+#define MASK_SRAI_U 0xfc00707f
+#define MATCH_SRA8 0x58000077
+#define MASK_SRA8 0xfe00707f
+#define MATCH_SRA8_U 0x68000077
+#define MASK_SRA8_U 0xfe00707f
+#define MATCH_SRAI8 0x78000077
+#define MASK_SRAI8 0xff80707f
+#define MATCH_SRAI8_U 0x78800077
+#define MASK_SRAI8_U 0xff80707f
+#define MATCH_SRA16 0x50000077
+#define MASK_SRA16 0xfe00707f
+#define MATCH_SRA16_U 0x60000077
+#define MASK_SRA16_U 0xfe00707f
+#define MATCH_SRAI16 0x70000077
+#define MASK_SRAI16 0xff00707f
+#define MATCH_SRAI16_U 0x71000077
+#define MASK_SRAI16_U 0xff00707f
+#define MATCH_SRL8 0x5a000077
+#define MASK_SRL8 0xfe00707f
+#define MATCH_SRL8_U 0x6a000077
+#define MASK_SRL8_U 0xfe00707f
+#define MATCH_SRLI8 0x7a000077
+#define MASK_SRLI8 0xff80707f
+#define MATCH_SRLI8_U 0x7a800077
+#define MASK_SRLI8_U 0xff80707f
+#define MATCH_SRL16 0x52000077
+#define MASK_SRL16 0xfe00707f
+#define MATCH_SRL16_U 0x62000077
+#define MASK_SRL16_U 0xfe00707f
+#define MATCH_SRLI16 0x72000077
+#define MASK_SRLI16 0xff00707f
+#define MATCH_SRLI16_U 0x73000077
+#define MASK_SRLI16_U 0xff00707f
+#define MATCH_STAS16 0xf4002077
+#define MASK_STAS16 0xfe00707f
+#define MATCH_STSA16 0xf6002077
+#define MASK_STSA16 0xfe00707f
+#define MATCH_SUB8 0x4a000077
+#define MASK_SUB8 0xfe00707f
+#define MATCH_SUB16 0x42000077
+#define MASK_SUB16 0xfe00707f
+#define MATCH_SUB64 0xc2001077
+#define MASK_SUB64 0xfe00707f
+#define MATCH_SUNPKD810 0xac800077
+#define MASK_SUNPKD810 0xfff0707f
+#define MATCH_SUNPKD820 0xac900077
+#define MASK_SUNPKD820 0xfff0707f
+#define MATCH_SUNPKD830 0xaca00077
+#define MASK_SUNPKD830 0xfff0707f
+#define MATCH_SUNPKD831 0xacb00077
+#define MASK_SUNPKD831 0xfff0707f
+#define MATCH_SUNPKD832 0xad300077
+#define MASK_SUNPKD832 0xfff0707f
+#define MATCH_SWAP8 0xad800077
+#define MASK_SWAP8 0xfff0707f
+#define MATCH_SWAP16 0xad900077
+#define MASK_SWAP16 0xfff0707f
+#define MATCH_UCLIP8 0x8d000077
+#define MASK_UCLIP8 0xff80707f
+#define MATCH_UCLIP16 0x85000077
+#define MASK_UCLIP16 0xff00707f
+#define MATCH_UCLIP32 0xf4000077
+#define MASK_UCLIP32 0xfe00707f
+#define MATCH_UCMPLE8 0x3e000077
+#define MASK_UCMPLE8 0xfe00707f
+#define MATCH_UCMPLE16 0x3c000077
+#define MASK_UCMPLE16 0xfe00707f
+#define MATCH_UCMPLT8 0x2e000077
+#define MASK_UCMPLT8 0xfe00707f
+#define MATCH_UCMPLT16 0x2c000077
+#define MASK_UCMPLT16 0xfe00707f
+#define MATCH_UKADD8 0x38000077
+#define MASK_UKADD8 0xfe00707f
+#define MATCH_UKADD16 0x30000077
+#define MASK_UKADD16 0xfe00707f
+#define MATCH_UKADD64 0xb0001077
+#define MASK_UKADD64 0xfe00707f
+#define MATCH_UKADDH 0x14001077
+#define MASK_UKADDH 0xfe00707f
+#define MATCH_UKADDW 0x10001077
+#define MASK_UKADDW 0xfe00707f
+#define MATCH_UKCRAS16 0x34000077
+#define MASK_UKCRAS16 0xfe00707f
+#define MATCH_UKCRSA16 0x36000077
+#define MASK_UKCRSA16 0xfe00707f
+#define MATCH_UKMAR64 0xb4001077
+#define MASK_UKMAR64 0xfe00707f
+#define MATCH_UKMSR64 0xb6001077
+#define MASK_UKMSR64 0xfe00707f
+#define MATCH_UKSTAS16 0xe4002077
+#define MASK_UKSTAS16 0xfe00707f
+#define MATCH_UKSTSA16 0xe6002077
+#define MASK_UKSTSA16 0xfe00707f
+#define MATCH_UKSUB8 0x3a000077
+#define MASK_UKSUB8 0xfe00707f
+#define MATCH_UKSUB16 0x32000077
+#define MASK_UKSUB16 0xfe00707f
+#define MATCH_UKSUB64 0xb2001077
+#define MASK_UKSUB64 0xfe00707f
+#define MATCH_UKSUBH 0x16001077
+#define MASK_UKSUBH 0xfe00707f
+#define MATCH_UKSUBW 0x12001077
+#define MASK_UKSUBW 0xfe00707f
+#define MATCH_UMAR64 0xa4001077
+#define MASK_UMAR64 0xfe00707f
+#define MATCH_UMAQA 0xcc000077
+#define MASK_UMAQA 0xfe00707f
+#define MATCH_UMAX8 0x9a000077
+#define MASK_UMAX8 0xfe00707f
+#define MATCH_UMAX16 0x92000077
+#define MASK_UMAX16 0xfe00707f
+#define MATCH_UMIN8 0x98000077
+#define MASK_UMIN8 0xfe00707f
+#define MATCH_UMIN16 0x90000077
+#define MASK_UMIN16 0xfe00707f
+#define MATCH_UMSR64 0xa6001077
+#define MASK_UMSR64 0xfe00707f
+#define MATCH_UMUL8 0xb8000077
+#define MASK_UMUL8 0xfe00707f
+#define MATCH_UMULX8 0xba000077
+#define MASK_UMULX8 0xfe00707f
+#define MATCH_UMUL16 0xb0000077
+#define MASK_UMUL16 0xfe00707f
+#define MATCH_UMULX16 0xb2000077
+#define MASK_UMULX16 0xfe00707f
+#define MATCH_URADD8 0x28000077
+#define MASK_URADD8 0xfe00707f
+#define MATCH_URADD16 0x20000077
+#define MASK_URADD16 0xfe00707f
+#define MATCH_URADD64 0xa0001077
+#define MASK_URADD64 0xfe00707f
+#define MATCH_URADDW 0x30001077
+#define MASK_URADDW 0xfe00707f
+#define MATCH_URCRAS16 0x24000077
+#define MASK_URCRAS16 0xfe00707f
+#define MATCH_URCRSA16 0x26000077
+#define MASK_URCRSA16 0xfe00707f
+#define MATCH_URSTAS16 0xd4002077
+#define MASK_URSTAS16 0xfe00707f
+#define MATCH_URSTSA16 0xd6002077
+#define MASK_URSTSA16 0xfe00707f
+#define MATCH_URSUB8 0x2a000077
+#define MASK_URSUB8 0xfe00707f
+#define MATCH_URSUB16 0x22000077
+#define MASK_URSUB16 0xfe00707f
+#define MATCH_URSUB64 0xa2001077
+#define MASK_URSUB64 0xfe00707f
+#define MATCH_URSUBW 0x32001077
+#define MASK_URSUBW 0xfe00707f
+#define MATCH_WEXTI 0xde000077
+#define MASK_WEXTI 0xfe00707f
+#define MATCH_WEXT 0xce000077
+#define MASK_WEXT 0xfe00707f
+#define MATCH_ZUNPKD810 0xacc00077
+#define MASK_ZUNPKD810 0xfff0707f
+#define MATCH_ZUNPKD820 0xacd00077
+#define MASK_ZUNPKD820 0xfff0707f
+#define MATCH_ZUNPKD830 0xace00077
+#define MASK_ZUNPKD830 0xfff0707f
+#define MATCH_ZUNPKD831 0xacf00077
+#define MASK_ZUNPKD831 0xfff0707f
+#define MATCH_ZUNPKD832 0xad700077
+#define MASK_ZUNPKD832 0xfff0707f
+#define MATCH_ADD32 0x40002077
+#define MASK_ADD32 0xfe00707f
+#define MATCH_CRAS32 0x44002077
+#define MASK_CRAS32 0xfe00707f
+#define MATCH_CRSA32 0x46002077
+#define MASK_CRSA32 0xfe00707f
+#define MATCH_KABS32 0xad200077
+#define MASK_KABS32 0xfff0707f
+#define MATCH_KADD32 0x10002077
+#define MASK_KADD32 0xfe00707f
+#define MATCH_KCRAS32 0x14002077
+#define MASK_KCRAS32 0xfe00707f
+#define MATCH_KCRSA32 0x16002077
+#define MASK_KCRSA32 0xfe00707f
+#define MATCH_KDMBB16 0xda001077
+#define MASK_KDMBB16 0xfe00707f
+#define MATCH_KDMBT16 0xea001077
+#define MASK_KDMBT16 0xfe00707f
+#define MATCH_KDMTT16 0xfa001077
+#define MASK_KDMTT16 0xfe00707f
+#define MATCH_KDMABB16 0xd8001077
+#define MASK_KDMABB16 0xfe00707f
+#define MATCH_KDMABT16 0xe8001077
+#define MASK_KDMABT16 0xfe00707f
+#define MATCH_KDMATT16 0xf8001077
+#define MASK_KDMATT16 0xfe00707f
+#define MATCH_KHMBB16 0xdc001077
+#define MASK_KHMBB16 0xfe00707f
+#define MATCH_KHMBT16 0xec001077
+#define MASK_KHMBT16 0xfe00707f
+#define MATCH_KHMTT16 0xfc001077
+#define MASK_KHMTT16 0xfe00707f
+#define MATCH_KMABB32 0x5a002077
+#define MASK_KMABB32 0xfe00707f
+#define MATCH_KMABT32 0x6a002077
+#define MASK_KMABT32 0xfe00707f
+#define MATCH_KMATT32 0x7a002077
+#define MASK_KMATT32 0xfe00707f
+#define MATCH_KMAXDA32 0x4a002077
+#define MASK_KMAXDA32 0xfe00707f
+#define MATCH_KMDA32 0x38002077
+#define MASK_KMDA32 0xfe00707f
+#define MATCH_KMXDA32 0x3a002077
+#define MASK_KMXDA32 0xfe00707f
+#define MATCH_KMADS32 0x5c002077
+#define MASK_KMADS32 0xfe00707f
+#define MATCH_KMADRS32 0x6c002077
+#define MASK_KMADRS32 0xfe00707f
+#define MATCH_KMAXDS32 0x7c002077
+#define MASK_KMAXDS32 0xfe00707f
+#define MATCH_KMSDA32 0x4c002077
+#define MASK_KMSDA32 0xfe00707f
+#define MATCH_KMSXDA32 0x4e002077
+#define MASK_KMSXDA32 0xfe00707f
+#define MATCH_KSLL32 0x64002077
+#define MASK_KSLL32 0xfe00707f
+#define MATCH_KSLLI32 0x84002077
+#define MASK_KSLLI32 0xfe00707f
+#define MATCH_KSLRA32 0x56002077
+#define MASK_KSLRA32 0xfe00707f
+#define MATCH_KSLRA32_U 0x66002077
+#define MASK_KSLRA32_U 0xfe00707f
+#define MATCH_KSTAS32 0xc0002077
+#define MASK_KSTAS32 0xfe00707f
+#define MATCH_KSTSA32 0xc2002077
+#define MASK_KSTSA32 0xfe00707f
+#define MATCH_KSUB32 0x12002077
+#define MASK_KSUB32 0xfe00707f
+#define MATCH_PKBB32 0xe002077
+#define MASK_PKBB32 0xfe00707f
+#define MATCH_PKBT32 0x1e002077
+#define MASK_PKBT32 0xfe00707f
+#define MATCH_PKTT32 0x2e002077
+#define MASK_PKTT32 0xfe00707f
+#define MATCH_PKTB32 0x3e002077
+#define MASK_PKTB32 0xfe00707f
+#define MATCH_RADD32 0x2077
+#define MASK_RADD32 0xfe00707f
+#define MATCH_RCRAS32 0x4002077
+#define MASK_RCRAS32 0xfe00707f
+#define MATCH_RCRSA32 0x6002077
+#define MASK_RCRSA32 0xfe00707f
+#define MATCH_RSTAS32 0xb0002077
+#define MASK_RSTAS32 0xfe00707f
+#define MATCH_RSTSA32 0xb2002077
+#define MASK_RSTSA32 0xfe00707f
+#define MATCH_RSUB32 0x2002077
+#define MASK_RSUB32 0xfe00707f
+#define MATCH_SLL32 0x54002077
+#define MASK_SLL32 0xfe00707f
+#define MATCH_SLLI32 0x74002077
+#define MASK_SLLI32 0xfe00707f
+#define MATCH_SMAX32 0x92002077
+#define MASK_SMAX32 0xfe00707f
+#define MATCH_SMBT32 0x18002077
+#define MASK_SMBT32 0xfe00707f
+#define MATCH_SMTT32 0x28002077
+#define MASK_SMTT32 0xfe00707f
+#define MATCH_SMDS32 0x58002077
+#define MASK_SMDS32 0xfe00707f
+#define MATCH_SMDRS32 0x68002077
+#define MASK_SMDRS32 0xfe00707f
+#define MATCH_SMXDS32 0x78002077
+#define MASK_SMXDS32 0xfe00707f
+#define MATCH_SMIN32 0x90002077
+#define MASK_SMIN32 0xfe00707f
+#define MATCH_SRA32 0x50002077
+#define MASK_SRA32 0xfe00707f
+#define MATCH_SRA32_U 0x60002077
+#define MASK_SRA32_U 0xfe00707f
+#define MATCH_SRAI32 0x70002077
+#define MASK_SRAI32 0xfe00707f
+#define MATCH_SRAI32_U 0x80002077
+#define MASK_SRAI32_U 0xfe00707f
+#define MATCH_SRAIW_U 0x34001077
+#define MASK_SRAIW_U 0xfe00707f
+#define MATCH_SRL32 0x52002077
+#define MASK_SRL32 0xfe00707f
+#define MATCH_SRL32_U 0x62002077
+#define MASK_SRL32_U 0xfe00707f
+#define MATCH_SRLI32 0x72002077
+#define MASK_SRLI32 0xfe00707f
+#define MATCH_SRLI32_U 0x82002077
+#define MASK_SRLI32_U 0xfe00707f
+#define MATCH_STAS32 0xf0002077
+#define MASK_STAS32 0xfe00707f
+#define MATCH_STSA32 0xf2002077
+#define MASK_STSA32 0xfe00707f
+#define MATCH_SUB32 0x42002077
+#define MASK_SUB32 0xfe00707f
+#define MATCH_UKADD32 0x30002077
+#define MASK_UKADD32 0xfe00707f
+#define MATCH_UKCRAS32 0x34002077
+#define MASK_UKCRAS32 0xfe00707f
+#define MATCH_UKCRSA32 0x36002077
+#define MASK_UKCRSA32 0xfe00707f
+#define MATCH_UKSTAS32 0xe0002077
+#define MASK_UKSTAS32 0xfe00707f
+#define MATCH_UKSTSA32 0xe2002077
+#define MASK_UKSTSA32 0xfe00707f
+#define MATCH_UKSUB32 0x32002077
+#define MASK_UKSUB32 0xfe00707f
+#define MATCH_UMAX32 0xa2002077
+#define MASK_UMAX32 0xfe00707f
+#define MATCH_UMIN32 0xa0002077
+#define MASK_UMIN32 0xfe00707f
+#define MATCH_URADD32 0x20002077
+#define MASK_URADD32 0xfe00707f
+#define MATCH_URCRAS32 0x24002077
+#define MASK_URCRAS32 0xfe00707f
+#define MATCH_URCRSA32 0x26002077
+#define MASK_URCRSA32 0xfe00707f
+#define MATCH_URSTAS32 0xd0002077
+#define MASK_URSTAS32 0xfe00707f
+#define MATCH_URSTSA32 0xd2002077
+#define MASK_URSTSA32 0xfe00707f
+#define MATCH_URSUB32 0x22002077
+#define MASK_URSUB32 0xfe00707f
#define CSR_FFLAGS 0x1
#define CSR_FRM 0x2
#define CSR_FCSR 0x3
@@ -3385,6 +4039,333 @@ DECLARE_INSN(vl1r_v, MATCH_VL1R_V, MASK_VL1R_V)
DECLARE_INSN(vl2r_v, MATCH_VL2R_V, MASK_VL2R_V)
DECLARE_INSN(vl4r_v, MATCH_VL4R_V, MASK_VL4R_V)
DECLARE_INSN(vl8r_v, MATCH_VL8R_V, MASK_VL8R_V)
+DECLARE_INSN(add8, MATCH_ADD8, MASK_ADD8)
+DECLARE_INSN(add16, MATCH_ADD16, MASK_ADD16)
+DECLARE_INSN(add64, MATCH_ADD64, MASK_ADD64)
+DECLARE_INSN(ave, MATCH_AVE, MASK_AVE)
+DECLARE_INSN(bitrev, MATCH_BITREV, MASK_BITREV)
+DECLARE_INSN(bitrevi, MATCH_BITREVI, MASK_BITREVI)
+DECLARE_INSN(bpick, MATCH_BPICK, MASK_BPICK)
+DECLARE_INSN(clrs8, MATCH_CLRS8, MASK_CLRS8)
+DECLARE_INSN(clrs16, MATCH_CLRS16, MASK_CLRS16)
+DECLARE_INSN(clrs32, MATCH_CLRS32, MASK_CLRS32)
+DECLARE_INSN(clo8, MATCH_CLO8, MASK_CLO8)
+DECLARE_INSN(clo16, MATCH_CLO16, MASK_CLO16)
+DECLARE_INSN(clo32, MATCH_CLO32, MASK_CLO32)
+DECLARE_INSN(clz8, MATCH_CLZ8, MASK_CLZ8)
+DECLARE_INSN(clz16, MATCH_CLZ16, MASK_CLZ16)
+DECLARE_INSN(clz32, MATCH_CLZ32, MASK_CLZ32)
+DECLARE_INSN(cmpeq8, MATCH_CMPEQ8, MASK_CMPEQ8)
+DECLARE_INSN(cmpeq16, MATCH_CMPEQ16, MASK_CMPEQ16)
+DECLARE_INSN(cras16, MATCH_CRAS16, MASK_CRAS16)
+DECLARE_INSN(crsa16, MATCH_CRSA16, MASK_CRSA16)
+DECLARE_INSN(insb, MATCH_INSB, MASK_INSB)
+DECLARE_INSN(kabs8, MATCH_KABS8, MASK_KABS8)
+DECLARE_INSN(kabs16, MATCH_KABS16, MASK_KABS16)
+DECLARE_INSN(kabsw, MATCH_KABSW, MASK_KABSW)
+DECLARE_INSN(kadd8, MATCH_KADD8, MASK_KADD8)
+DECLARE_INSN(kadd16, MATCH_KADD16, MASK_KADD16)
+DECLARE_INSN(kadd64, MATCH_KADD64, MASK_KADD64)
+DECLARE_INSN(kaddh, MATCH_KADDH, MASK_KADDH)
+DECLARE_INSN(kaddw, MATCH_KADDW, MASK_KADDW)
+DECLARE_INSN(kcras16, MATCH_KCRAS16, MASK_KCRAS16)
+DECLARE_INSN(kcrsa16, MATCH_KCRSA16, MASK_KCRSA16)
+DECLARE_INSN(kdmbb, MATCH_KDMBB, MASK_KDMBB)
+DECLARE_INSN(kdmbt, MATCH_KDMBT, MASK_KDMBT)
+DECLARE_INSN(kdmtt, MATCH_KDMTT, MASK_KDMTT)
+DECLARE_INSN(kdmabb, MATCH_KDMABB, MASK_KDMABB)
+DECLARE_INSN(kdmabt, MATCH_KDMABT, MASK_KDMABT)
+DECLARE_INSN(kdmatt, MATCH_KDMATT, MASK_KDMATT)
+DECLARE_INSN(khm8, MATCH_KHM8, MASK_KHM8)
+DECLARE_INSN(khmx8, MATCH_KHMX8, MASK_KHMX8)
+DECLARE_INSN(khm16, MATCH_KHM16, MASK_KHM16)
+DECLARE_INSN(khmx16, MATCH_KHMX16, MASK_KHMX16)
+DECLARE_INSN(khmbb, MATCH_KHMBB, MASK_KHMBB)
+DECLARE_INSN(khmbt, MATCH_KHMBT, MASK_KHMBT)
+DECLARE_INSN(khmtt, MATCH_KHMTT, MASK_KHMTT)
+DECLARE_INSN(kmabb, MATCH_KMABB, MASK_KMABB)
+DECLARE_INSN(kmabt, MATCH_KMABT, MASK_KMABT)
+DECLARE_INSN(kmatt, MATCH_KMATT, MASK_KMATT)
+DECLARE_INSN(kmada, MATCH_KMADA, MASK_KMADA)
+DECLARE_INSN(kmaxda, MATCH_KMAXDA, MASK_KMAXDA)
+DECLARE_INSN(kmads, MATCH_KMADS, MASK_KMADS)
+DECLARE_INSN(kmadrs, MATCH_KMADRS, MASK_KMADRS)
+DECLARE_INSN(kmaxds, MATCH_KMAXDS, MASK_KMAXDS)
+DECLARE_INSN(kmar64, MATCH_KMAR64, MASK_KMAR64)
+DECLARE_INSN(kmda, MATCH_KMDA, MASK_KMDA)
+DECLARE_INSN(kmxda, MATCH_KMXDA, MASK_KMXDA)
+DECLARE_INSN(kmmac, MATCH_KMMAC, MASK_KMMAC)
+DECLARE_INSN(kmmac_u, MATCH_KMMAC_U, MASK_KMMAC_U)
+DECLARE_INSN(kmmawb, MATCH_KMMAWB, MASK_KMMAWB)
+DECLARE_INSN(kmmawb_u, MATCH_KMMAWB_U, MASK_KMMAWB_U)
+DECLARE_INSN(kmmawb2, MATCH_KMMAWB2, MASK_KMMAWB2)
+DECLARE_INSN(kmmawb2_u, MATCH_KMMAWB2_U, MASK_KMMAWB2_U)
+DECLARE_INSN(kmmawt, MATCH_KMMAWT, MASK_KMMAWT)
+DECLARE_INSN(kmmawt_u, MATCH_KMMAWT_U, MASK_KMMAWT_U)
+DECLARE_INSN(kmmawt2, MATCH_KMMAWT2, MASK_KMMAWT2)
+DECLARE_INSN(kmmawt2_u, MATCH_KMMAWT2_U, MASK_KMMAWT2_U)
+DECLARE_INSN(kmmsb, MATCH_KMMSB, MASK_KMMSB)
+DECLARE_INSN(kmmsb_u, MATCH_KMMSB_U, MASK_KMMSB_U)
+DECLARE_INSN(kmmwb2, MATCH_KMMWB2, MASK_KMMWB2)
+DECLARE_INSN(kmmwb2_u, MATCH_KMMWB2_U, MASK_KMMWB2_U)
+DECLARE_INSN(kmmwt2, MATCH_KMMWT2, MASK_KMMWT2)
+DECLARE_INSN(kmmwt2_u, MATCH_KMMWT2_U, MASK_KMMWT2_U)
+DECLARE_INSN(kmsda, MATCH_KMSDA, MASK_KMSDA)
+DECLARE_INSN(kmsxda, MATCH_KMSXDA, MASK_KMSXDA)
+DECLARE_INSN(kmsr64, MATCH_KMSR64, MASK_KMSR64)
+DECLARE_INSN(ksllw, MATCH_KSLLW, MASK_KSLLW)
+DECLARE_INSN(kslliw, MATCH_KSLLIW, MASK_KSLLIW)
+DECLARE_INSN(ksll8, MATCH_KSLL8, MASK_KSLL8)
+DECLARE_INSN(kslli8, MATCH_KSLLI8, MASK_KSLLI8)
+DECLARE_INSN(ksll16, MATCH_KSLL16, MASK_KSLL16)
+DECLARE_INSN(kslli16, MATCH_KSLLI16, MASK_KSLLI16)
+DECLARE_INSN(kslra8, MATCH_KSLRA8, MASK_KSLRA8)
+DECLARE_INSN(kslra8_u, MATCH_KSLRA8_U, MASK_KSLRA8_U)
+DECLARE_INSN(kslra16, MATCH_KSLRA16, MASK_KSLRA16)
+DECLARE_INSN(kslra16_u, MATCH_KSLRA16_U, MASK_KSLRA16_U)
+DECLARE_INSN(kslraw, MATCH_KSLRAW, MASK_KSLRAW)
+DECLARE_INSN(kslraw_u, MATCH_KSLRAW_U, MASK_KSLRAW_U)
+DECLARE_INSN(kstas16, MATCH_KSTAS16, MASK_KSTAS16)
+DECLARE_INSN(kstsa16, MATCH_KSTSA16, MASK_KSTSA16)
+DECLARE_INSN(ksub8, MATCH_KSUB8, MASK_KSUB8)
+DECLARE_INSN(ksub16, MATCH_KSUB16, MASK_KSUB16)
+DECLARE_INSN(ksub64, MATCH_KSUB64, MASK_KSUB64)
+DECLARE_INSN(ksubh, MATCH_KSUBH, MASK_KSUBH)
+DECLARE_INSN(ksubw, MATCH_KSUBW, MASK_KSUBW)
+DECLARE_INSN(kwmmul, MATCH_KWMMUL, MASK_KWMMUL)
+DECLARE_INSN(kwmmul_u, MATCH_KWMMUL_U, MASK_KWMMUL_U)
+DECLARE_INSN(maddr32, MATCH_MADDR32, MASK_MADDR32)
+DECLARE_INSN(maxw, MATCH_MAXW, MASK_MAXW)
+DECLARE_INSN(minw, MATCH_MINW, MASK_MINW)
+DECLARE_INSN(msubr32, MATCH_MSUBR32, MASK_MSUBR32)
+DECLARE_INSN(mulr64, MATCH_MULR64, MASK_MULR64)
+DECLARE_INSN(mulsr64, MATCH_MULSR64, MASK_MULSR64)
+DECLARE_INSN(pbsad, MATCH_PBSAD, MASK_PBSAD)
+DECLARE_INSN(pbsada, MATCH_PBSADA, MASK_PBSADA)
+DECLARE_INSN(pkbb16, MATCH_PKBB16, MASK_PKBB16)
+DECLARE_INSN(pkbt16, MATCH_PKBT16, MASK_PKBT16)
+DECLARE_INSN(pktt16, MATCH_PKTT16, MASK_PKTT16)
+DECLARE_INSN(pktb16, MATCH_PKTB16, MASK_PKTB16)
+DECLARE_INSN(radd8, MATCH_RADD8, MASK_RADD8)
+DECLARE_INSN(radd16, MATCH_RADD16, MASK_RADD16)
+DECLARE_INSN(radd64, MATCH_RADD64, MASK_RADD64)
+DECLARE_INSN(raddw, MATCH_RADDW, MASK_RADDW)
+DECLARE_INSN(rcras16, MATCH_RCRAS16, MASK_RCRAS16)
+DECLARE_INSN(rcrsa16, MATCH_RCRSA16, MASK_RCRSA16)
+DECLARE_INSN(rstas16, MATCH_RSTAS16, MASK_RSTAS16)
+DECLARE_INSN(rstsa16, MATCH_RSTSA16, MASK_RSTSA16)
+DECLARE_INSN(rsub8, MATCH_RSUB8, MASK_RSUB8)
+DECLARE_INSN(rsub16, MATCH_RSUB16, MASK_RSUB16)
+DECLARE_INSN(rsub64, MATCH_RSUB64, MASK_RSUB64)
+DECLARE_INSN(rsubw, MATCH_RSUBW, MASK_RSUBW)
+DECLARE_INSN(sclip8, MATCH_SCLIP8, MASK_SCLIP8)
+DECLARE_INSN(sclip16, MATCH_SCLIP16, MASK_SCLIP16)
+DECLARE_INSN(sclip32, MATCH_SCLIP32, MASK_SCLIP32)
+DECLARE_INSN(scmple8, MATCH_SCMPLE8, MASK_SCMPLE8)
+DECLARE_INSN(scmple16, MATCH_SCMPLE16, MASK_SCMPLE16)
+DECLARE_INSN(scmplt8, MATCH_SCMPLT8, MASK_SCMPLT8)
+DECLARE_INSN(scmplt16, MATCH_SCMPLT16, MASK_SCMPLT16)
+DECLARE_INSN(sll8, MATCH_SLL8, MASK_SLL8)
+DECLARE_INSN(slli8, MATCH_SLLI8, MASK_SLLI8)
+DECLARE_INSN(sll16, MATCH_SLL16, MASK_SLL16)
+DECLARE_INSN(slli16, MATCH_SLLI16, MASK_SLLI16)
+DECLARE_INSN(smal, MATCH_SMAL, MASK_SMAL)
+DECLARE_INSN(smalbb, MATCH_SMALBB, MASK_SMALBB)
+DECLARE_INSN(smalbt, MATCH_SMALBT, MASK_SMALBT)
+DECLARE_INSN(smaltt, MATCH_SMALTT, MASK_SMALTT)
+DECLARE_INSN(smalda, MATCH_SMALDA, MASK_SMALDA)
+DECLARE_INSN(smalxda, MATCH_SMALXDA, MASK_SMALXDA)
+DECLARE_INSN(smalds, MATCH_SMALDS, MASK_SMALDS)
+DECLARE_INSN(smaldrs, MATCH_SMALDRS, MASK_SMALDRS)
+DECLARE_INSN(smalxds, MATCH_SMALXDS, MASK_SMALXDS)
+DECLARE_INSN(smar64, MATCH_SMAR64, MASK_SMAR64)
+DECLARE_INSN(smaqa, MATCH_SMAQA, MASK_SMAQA)
+DECLARE_INSN(smaqa_su, MATCH_SMAQA_SU, MASK_SMAQA_SU)
+DECLARE_INSN(smax8, MATCH_SMAX8, MASK_SMAX8)
+DECLARE_INSN(smax16, MATCH_SMAX16, MASK_SMAX16)
+DECLARE_INSN(smbb16, MATCH_SMBB16, MASK_SMBB16)
+DECLARE_INSN(smbt16, MATCH_SMBT16, MASK_SMBT16)
+DECLARE_INSN(smtt16, MATCH_SMTT16, MASK_SMTT16)
+DECLARE_INSN(smds, MATCH_SMDS, MASK_SMDS)
+DECLARE_INSN(smdrs, MATCH_SMDRS, MASK_SMDRS)
+DECLARE_INSN(smxds, MATCH_SMXDS, MASK_SMXDS)
+DECLARE_INSN(smin8, MATCH_SMIN8, MASK_SMIN8)
+DECLARE_INSN(smin16, MATCH_SMIN16, MASK_SMIN16)
+DECLARE_INSN(smmul, MATCH_SMMUL, MASK_SMMUL)
+DECLARE_INSN(smmul_u, MATCH_SMMUL_U, MASK_SMMUL_U)
+DECLARE_INSN(smmwb, MATCH_SMMWB, MASK_SMMWB)
+DECLARE_INSN(smmwb_u, MATCH_SMMWB_U, MASK_SMMWB_U)
+DECLARE_INSN(smmwt, MATCH_SMMWT, MASK_SMMWT)
+DECLARE_INSN(smmwt_u, MATCH_SMMWT_U, MASK_SMMWT_U)
+DECLARE_INSN(smslda, MATCH_SMSLDA, MASK_SMSLDA)
+DECLARE_INSN(smslxda, MATCH_SMSLXDA, MASK_SMSLXDA)
+DECLARE_INSN(smsr64, MATCH_SMSR64, MASK_SMSR64)
+DECLARE_INSN(smul8, MATCH_SMUL8, MASK_SMUL8)
+DECLARE_INSN(smulx8, MATCH_SMULX8, MASK_SMULX8)
+DECLARE_INSN(smul16, MATCH_SMUL16, MASK_SMUL16)
+DECLARE_INSN(smulx16, MATCH_SMULX16, MASK_SMULX16)
+DECLARE_INSN(sra_u, MATCH_SRA_U, MASK_SRA_U)
+DECLARE_INSN(srai_u, MATCH_SRAI_U, MASK_SRAI_U)
+DECLARE_INSN(sra8, MATCH_SRA8, MASK_SRA8)
+DECLARE_INSN(sra8_u, MATCH_SRA8_U, MASK_SRA8_U)
+DECLARE_INSN(srai8, MATCH_SRAI8, MASK_SRAI8)
+DECLARE_INSN(srai8_u, MATCH_SRAI8_U, MASK_SRAI8_U)
+DECLARE_INSN(sra16, MATCH_SRA16, MASK_SRA16)
+DECLARE_INSN(sra16_u, MATCH_SRA16_U, MASK_SRA16_U)
+DECLARE_INSN(srai16, MATCH_SRAI16, MASK_SRAI16)
+DECLARE_INSN(srai16_u, MATCH_SRAI16_U, MASK_SRAI16_U)
+DECLARE_INSN(srl8, MATCH_SRL8, MASK_SRL8)
+DECLARE_INSN(srl8_u, MATCH_SRL8_U, MASK_SRL8_U)
+DECLARE_INSN(srli8, MATCH_SRLI8, MASK_SRLI8)
+DECLARE_INSN(srli8_u, MATCH_SRLI8_U, MASK_SRLI8_U)
+DECLARE_INSN(srl16, MATCH_SRL16, MASK_SRL16)
+DECLARE_INSN(srl16_u, MATCH_SRL16_U, MASK_SRL16_U)
+DECLARE_INSN(srli16, MATCH_SRLI16, MASK_SRLI16)
+DECLARE_INSN(srli16_u, MATCH_SRLI16_U, MASK_SRLI16_U)
+DECLARE_INSN(stas16, MATCH_STAS16, MASK_STAS16)
+DECLARE_INSN(stsa16, MATCH_STSA16, MASK_STSA16)
+DECLARE_INSN(sub8, MATCH_SUB8, MASK_SUB8)
+DECLARE_INSN(sub16, MATCH_SUB16, MASK_SUB16)
+DECLARE_INSN(sub64, MATCH_SUB64, MASK_SUB64)
+DECLARE_INSN(sunpkd810, MATCH_SUNPKD810, MASK_SUNPKD810)
+DECLARE_INSN(sunpkd820, MATCH_SUNPKD820, MASK_SUNPKD820)
+DECLARE_INSN(sunpkd830, MATCH_SUNPKD830, MASK_SUNPKD830)
+DECLARE_INSN(sunpkd831, MATCH_SUNPKD831, MASK_SUNPKD831)
+DECLARE_INSN(sunpkd832, MATCH_SUNPKD832, MASK_SUNPKD832)
+DECLARE_INSN(swap8, MATCH_SWAP8, MASK_SWAP8)
+DECLARE_INSN(swap16, MATCH_SWAP16, MASK_SWAP16)
+DECLARE_INSN(uclip8, MATCH_UCLIP8, MASK_UCLIP8)
+DECLARE_INSN(uclip16, MATCH_UCLIP16, MASK_UCLIP16)
+DECLARE_INSN(uclip32, MATCH_UCLIP32, MASK_UCLIP32)
+DECLARE_INSN(ucmple8, MATCH_UCMPLE8, MASK_UCMPLE8)
+DECLARE_INSN(ucmple16, MATCH_UCMPLE16, MASK_UCMPLE16)
+DECLARE_INSN(ucmplt8, MATCH_UCMPLT8, MASK_UCMPLT8)
+DECLARE_INSN(ucmplt16, MATCH_UCMPLT16, MASK_UCMPLT16)
+DECLARE_INSN(ukadd8, MATCH_UKADD8, MASK_UKADD8)
+DECLARE_INSN(ukadd16, MATCH_UKADD16, MASK_UKADD16)
+DECLARE_INSN(ukadd64, MATCH_UKADD64, MASK_UKADD64)
+DECLARE_INSN(ukaddh, MATCH_UKADDH, MASK_UKADDH)
+DECLARE_INSN(ukaddw, MATCH_UKADDW, MASK_UKADDW)
+DECLARE_INSN(ukcras16, MATCH_UKCRAS16, MASK_UKCRAS16)
+DECLARE_INSN(ukcrsa16, MATCH_UKCRSA16, MASK_UKCRSA16)
+DECLARE_INSN(ukmar64, MATCH_UKMAR64, MASK_UKMAR64)
+DECLARE_INSN(ukmsr64, MATCH_UKMSR64, MASK_UKMSR64)
+DECLARE_INSN(ukstas16, MATCH_UKSTAS16, MASK_UKSTAS16)
+DECLARE_INSN(ukstsa16, MATCH_UKSTSA16, MASK_UKSTSA16)
+DECLARE_INSN(uksub8, MATCH_UKSUB8, MASK_UKSUB8)
+DECLARE_INSN(uksub16, MATCH_UKSUB16, MASK_UKSUB16)
+DECLARE_INSN(uksub64, MATCH_UKSUB64, MASK_UKSUB64)
+DECLARE_INSN(uksubh, MATCH_UKSUBH, MASK_UKSUBH)
+DECLARE_INSN(uksubw, MATCH_UKSUBW, MASK_UKSUBW)
+DECLARE_INSN(umar64, MATCH_UMAR64, MASK_UMAR64)
+DECLARE_INSN(umaqa, MATCH_UMAQA, MASK_UMAQA)
+DECLARE_INSN(umax8, MATCH_UMAX8, MASK_UMAX8)
+DECLARE_INSN(umax16, MATCH_UMAX16, MASK_UMAX16)
+DECLARE_INSN(umin8, MATCH_UMIN8, MASK_UMIN8)
+DECLARE_INSN(umin16, MATCH_UMIN16, MASK_UMIN16)
+DECLARE_INSN(umsr64, MATCH_UMSR64, MASK_UMSR64)
+DECLARE_INSN(umul8, MATCH_UMUL8, MASK_UMUL8)
+DECLARE_INSN(umulx8, MATCH_UMULX8, MASK_UMULX8)
+DECLARE_INSN(umul16, MATCH_UMUL16, MASK_UMUL16)
+DECLARE_INSN(umulx16, MATCH_UMULX16, MASK_UMULX16)
+DECLARE_INSN(uradd8, MATCH_URADD8, MASK_URADD8)
+DECLARE_INSN(uradd16, MATCH_URADD16, MASK_URADD16)
+DECLARE_INSN(uradd64, MATCH_URADD64, MASK_URADD64)
+DECLARE_INSN(uraddw, MATCH_URADDW, MASK_URADDW)
+DECLARE_INSN(urcras16, MATCH_URCRAS16, MASK_URCRAS16)
+DECLARE_INSN(urcrsa16, MATCH_URCRSA16, MASK_URCRSA16)
+DECLARE_INSN(urstas16, MATCH_URSTAS16, MASK_URSTAS16)
+DECLARE_INSN(urstsa16, MATCH_URSTSA16, MASK_URSTSA16)
+DECLARE_INSN(ursub8, MATCH_URSUB8, MASK_URSUB8)
+DECLARE_INSN(ursub16, MATCH_URSUB16, MASK_URSUB16)
+DECLARE_INSN(ursub64, MATCH_URSUB64, MASK_URSUB64)
+DECLARE_INSN(ursubw, MATCH_URSUBW, MASK_URSUBW)
+DECLARE_INSN(wexti, MATCH_WEXTI, MASK_WEXTI)
+DECLARE_INSN(wext, MATCH_WEXT, MASK_WEXT)
+DECLARE_INSN(zunpkd810, MATCH_ZUNPKD810, MASK_ZUNPKD810)
+DECLARE_INSN(zunpkd820, MATCH_ZUNPKD820, MASK_ZUNPKD820)
+DECLARE_INSN(zunpkd830, MATCH_ZUNPKD830, MASK_ZUNPKD830)
+DECLARE_INSN(zunpkd831, MATCH_ZUNPKD831, MASK_ZUNPKD831)
+DECLARE_INSN(zunpkd832, MATCH_ZUNPKD832, MASK_ZUNPKD832)
+DECLARE_INSN(add32, MATCH_ADD32, MASK_ADD32)
+DECLARE_INSN(cras32, MATCH_CRAS32, MASK_CRAS32)
+DECLARE_INSN(crsa32, MATCH_CRSA32, MASK_CRSA32)
+DECLARE_INSN(kabs32, MATCH_KABS32, MASK_KABS32)
+DECLARE_INSN(kadd32, MATCH_KADD32, MASK_KADD32)
+DECLARE_INSN(kcras32, MATCH_KCRAS32, MASK_KCRAS32)
+DECLARE_INSN(kcrsa32, MATCH_KCRSA32, MASK_KCRSA32)
+DECLARE_INSN(kdmbb16, MATCH_KDMBB16, MASK_KDMBB16)
+DECLARE_INSN(kdmbt16, MATCH_KDMBT16, MASK_KDMBT16)
+DECLARE_INSN(kdmtt16, MATCH_KDMTT16, MASK_KDMTT16)
+DECLARE_INSN(kdmabb16, MATCH_KDMABB16, MASK_KDMABB16)
+DECLARE_INSN(kdmabt16, MATCH_KDMABT16, MASK_KDMABT16)
+DECLARE_INSN(kdmatt16, MATCH_KDMATT16, MASK_KDMATT16)
+DECLARE_INSN(khmbb16, MATCH_KHMBB16, MASK_KHMBB16)
+DECLARE_INSN(khmbt16, MATCH_KHMBT16, MASK_KHMBT16)
+DECLARE_INSN(khmtt16, MATCH_KHMTT16, MASK_KHMTT16)
+DECLARE_INSN(kmabb32, MATCH_KMABB32, MASK_KMABB32)
+DECLARE_INSN(kmabt32, MATCH_KMABT32, MASK_KMABT32)
+DECLARE_INSN(kmatt32, MATCH_KMATT32, MASK_KMATT32)
+DECLARE_INSN(kmaxda32, MATCH_KMAXDA32, MASK_KMAXDA32)
+DECLARE_INSN(kmda32, MATCH_KMDA32, MASK_KMDA32)
+DECLARE_INSN(kmxda32, MATCH_KMXDA32, MASK_KMXDA32)
+DECLARE_INSN(kmads32, MATCH_KMADS32, MASK_KMADS32)
+DECLARE_INSN(kmadrs32, MATCH_KMADRS32, MASK_KMADRS32)
+DECLARE_INSN(kmaxds32, MATCH_KMAXDS32, MASK_KMAXDS32)
+DECLARE_INSN(kmsda32, MATCH_KMSDA32, MASK_KMSDA32)
+DECLARE_INSN(kmsxda32, MATCH_KMSXDA32, MASK_KMSXDA32)
+DECLARE_INSN(ksll32, MATCH_KSLL32, MASK_KSLL32)
+DECLARE_INSN(kslli32, MATCH_KSLLI32, MASK_KSLLI32)
+DECLARE_INSN(kslra32, MATCH_KSLRA32, MASK_KSLRA32)
+DECLARE_INSN(kslra32_u, MATCH_KSLRA32_U, MASK_KSLRA32_U)
+DECLARE_INSN(kstas32, MATCH_KSTAS32, MASK_KSTAS32)
+DECLARE_INSN(kstsa32, MATCH_KSTSA32, MASK_KSTSA32)
+DECLARE_INSN(ksub32, MATCH_KSUB32, MASK_KSUB32)
+DECLARE_INSN(pkbb32, MATCH_PKBB32, MASK_PKBB32)
+DECLARE_INSN(pkbt32, MATCH_PKBT32, MASK_PKBT32)
+DECLARE_INSN(pktt32, MATCH_PKTT32, MASK_PKTT32)
+DECLARE_INSN(pktb32, MATCH_PKTB32, MASK_PKTB32)
+DECLARE_INSN(radd32, MATCH_RADD32, MASK_RADD32)
+DECLARE_INSN(rcras32, MATCH_RCRAS32, MASK_RCRAS32)
+DECLARE_INSN(rcrsa32, MATCH_RCRSA32, MASK_RCRSA32)
+DECLARE_INSN(rstas32, MATCH_RSTAS32, MASK_RSTAS32)
+DECLARE_INSN(rstsa32, MATCH_RSTSA32, MASK_RSTSA32)
+DECLARE_INSN(rsub32, MATCH_RSUB32, MASK_RSUB32)
+DECLARE_INSN(sll32, MATCH_SLL32, MASK_SLL32)
+DECLARE_INSN(slli32, MATCH_SLLI32, MASK_SLLI32)
+DECLARE_INSN(smax32, MATCH_SMAX32, MASK_SMAX32)
+DECLARE_INSN(smbt32, MATCH_SMBT32, MASK_SMBT32)
+DECLARE_INSN(smtt32, MATCH_SMTT32, MASK_SMTT32)
+DECLARE_INSN(smds32, MATCH_SMDS32, MASK_SMDS32)
+DECLARE_INSN(smdrs32, MATCH_SMDRS32, MASK_SMDRS32)
+DECLARE_INSN(smxds32, MATCH_SMXDS32, MASK_SMXDS32)
+DECLARE_INSN(smin32, MATCH_SMIN32, MASK_SMIN32)
+DECLARE_INSN(sra32, MATCH_SRA32, MASK_SRA32)
+DECLARE_INSN(sra32_u, MATCH_SRA32_U, MASK_SRA32_U)
+DECLARE_INSN(srai32, MATCH_SRAI32, MASK_SRAI32)
+DECLARE_INSN(srai32_u, MATCH_SRAI32_U, MASK_SRAI32_U)
+DECLARE_INSN(sraiw_u, MATCH_SRAIW_U, MASK_SRAIW_U)
+DECLARE_INSN(srl32, MATCH_SRL32, MASK_SRL32)
+DECLARE_INSN(srl32_u, MATCH_SRL32_U, MASK_SRL32_U)
+DECLARE_INSN(srli32, MATCH_SRLI32, MASK_SRLI32)
+DECLARE_INSN(srli32_u, MATCH_SRLI32_U, MASK_SRLI32_U)
+DECLARE_INSN(stas32, MATCH_STAS32, MASK_STAS32)
+DECLARE_INSN(stsa32, MATCH_STSA32, MASK_STSA32)
+DECLARE_INSN(sub32, MATCH_SUB32, MASK_SUB32)
+DECLARE_INSN(ukadd32, MATCH_UKADD32, MASK_UKADD32)
+DECLARE_INSN(ukcras32, MATCH_UKCRAS32, MASK_UKCRAS32)
+DECLARE_INSN(ukcrsa32, MATCH_UKCRSA32, MASK_UKCRSA32)
+DECLARE_INSN(ukstas32, MATCH_UKSTAS32, MASK_UKSTAS32)
+DECLARE_INSN(ukstsa32, MATCH_UKSTSA32, MASK_UKSTSA32)
+DECLARE_INSN(uksub32, MATCH_UKSUB32, MASK_UKSUB32)
+DECLARE_INSN(umax32, MATCH_UMAX32, MASK_UMAX32)
+DECLARE_INSN(umin32, MATCH_UMIN32, MASK_UMIN32)
+DECLARE_INSN(uradd32, MATCH_URADD32, MASK_URADD32)
+DECLARE_INSN(urcras32, MATCH_URCRAS32, MASK_URCRAS32)
+DECLARE_INSN(urcrsa32, MATCH_URCRSA32, MASK_URCRSA32)
+DECLARE_INSN(urstas32, MATCH_URSTAS32, MASK_URSTAS32)
+DECLARE_INSN(urstsa32, MATCH_URSTSA32, MASK_URSTSA32)
+DECLARE_INSN(ursub32, MATCH_URSUB32, MASK_URSUB32)
#endif
#ifdef DECLARE_CSR
DECLARE_CSR(fflags, CSR_FFLAGS)
diff --git a/riscv/insns/add16.h b/riscv/insns/add16.h
new file mode 100644
index 0000000..53b6be8
--- /dev/null
+++ b/riscv/insns/add16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/add32.h b/riscv/insns/add32.h
new file mode 100644
index 0000000..7fe7c96
--- /dev/null
+++ b/riscv/insns/add32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_LOOP(32, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/add64.h b/riscv/insns/add64.h
new file mode 100644
index 0000000..eb076b2
--- /dev/null
+++ b/riscv/insns/add64.h
@@ -0,0 +1,3 @@
+P_64_PROFILE({
+ rd = rs1 + rs2;
+}) \ No newline at end of file
diff --git a/riscv/insns/add8.h b/riscv/insns/add8.h
new file mode 100644
index 0000000..e5e1cb0
--- /dev/null
+++ b/riscv/insns/add8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/ave.h b/riscv/insns/ave.h
new file mode 100644
index 0000000..15bf863
--- /dev/null
+++ b/riscv/insns/ave.h
@@ -0,0 +1,5 @@
+require_extension('P');
+sreg_t rs1 = RS1;
+sreg_t rs2 = RS2;
+sreg_t carry = (rs1 & 1) | (rs2 & 1);
+WRITE_RD(sext_xlen((rs1 >> 1) + (rs2 >> 1) + carry)); \ No newline at end of file
diff --git a/riscv/insns/bitrev.h b/riscv/insns/bitrev.h
new file mode 100644
index 0000000..939f5a4
--- /dev/null
+++ b/riscv/insns/bitrev.h
@@ -0,0 +1,12 @@
+require_extension('P');
+reg_t msb = get_field(RS2, make_mask64(0, xlen == 32 ? 5 : 6));
+reg_t n = get_field(RS1, make_mask64(0, msb + 1));
+reg_t rev = 0;
+
+for (size_t i = 0; i <= msb; i++) {
+ rev <<= 1;
+ rev |= n & 1;
+ n >>= 1;
+}
+
+WRITE_RD(sext_xlen(rev)); \ No newline at end of file
diff --git a/riscv/insns/bitrevi.h b/riscv/insns/bitrevi.h
new file mode 100644
index 0000000..5350e6b
--- /dev/null
+++ b/riscv/insns/bitrevi.h
@@ -0,0 +1,12 @@
+require_extension('P');
+reg_t msb = xlen == 32 ? insn.p_imm5() : insn.p_imm6();
+reg_t n = get_field(RS1, make_mask64(0, msb + 1));
+reg_t rev = 0;
+
+for (size_t i = 0; i <= msb; i++) {
+ rev <<= 1;
+ rev |= n & 1;
+ n >>= 1;
+}
+
+WRITE_RD(sext_xlen(rev)); \ No newline at end of file
diff --git a/riscv/insns/bpick.h b/riscv/insns/bpick.h
new file mode 100644
index 0000000..fc83086
--- /dev/null
+++ b/riscv/insns/bpick.h
@@ -0,0 +1,6 @@
+require_extension('P');
+reg_t rc = RS3;
+reg_t rs1 = RS1;
+reg_t rs2 = RS2;
+
+WRITE_RD(sext_xlen((rs1 & rc) | (rs2 & ~rc))); \ No newline at end of file
diff --git a/riscv/insns/clo16.h b/riscv/insns/clo16.h
new file mode 100644
index 0000000..cc714ac
--- /dev/null
+++ b/riscv/insns/clo16.h
@@ -0,0 +1,11 @@
+P_ONE_LOOP(16, {
+ pd = 0;
+ ps1 = ~ps1;
+ if (!ps1) pd = 16;
+ else {
+ if ((ps1 & 0xFF00) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x8000) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/clo32.h b/riscv/insns/clo32.h
new file mode 100644
index 0000000..2f81a25
--- /dev/null
+++ b/riscv/insns/clo32.h
@@ -0,0 +1,12 @@
+P_ONE_LOOP(32, {
+ pd = 0;
+ ps1 = ~ps1;
+ if (!ps1) pd = 32;
+ else {
+ if ((ps1 & 0xFFFF0000) == 0) { pd += 16; ps1 <<= 16; }
+ if ((ps1 & 0xFF000000) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF0000000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0000000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80000000) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/clo8.h b/riscv/insns/clo8.h
new file mode 100644
index 0000000..1009b39
--- /dev/null
+++ b/riscv/insns/clo8.h
@@ -0,0 +1,10 @@
+P_ONE_LOOP(8, {
+ pd = 0;
+ ps1 = ~ps1;
+ if (!ps1) pd = 8;
+ else {
+ if ((ps1 & 0xF0) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/clrs16.h b/riscv/insns/clrs16.h
new file mode 100644
index 0000000..c9362a6
--- /dev/null
+++ b/riscv/insns/clrs16.h
@@ -0,0 +1,12 @@
+P_ONE_LOOP(16, {
+ pd = 0;
+ if (ps1 < 0) ps1 = ~ps1;
+ if (!ps1) pd = 16;
+ else {
+ if ((ps1 & 0xFF00) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x8000) == 0) { pd += 1; }
+ }
+ pd -= 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/clrs32.h b/riscv/insns/clrs32.h
new file mode 100644
index 0000000..55fc4f3
--- /dev/null
+++ b/riscv/insns/clrs32.h
@@ -0,0 +1,13 @@
+P_ONE_LOOP(32, {
+ pd = 0;
+ if (ps1 < 0) ps1 = ~ps1;
+ if (!ps1) pd = 32;
+ else {
+ if ((ps1 & 0xFFFF0000) == 0) { pd += 16; ps1 <<= 16; }
+ if ((ps1 & 0xFF000000) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF0000000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0000000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80000000) == 0) { pd += 1; }
+ }
+ pd -= 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/clrs8.h b/riscv/insns/clrs8.h
new file mode 100644
index 0000000..071982c
--- /dev/null
+++ b/riscv/insns/clrs8.h
@@ -0,0 +1,11 @@
+P_ONE_LOOP(8, {
+ pd = 0;
+ if (ps1 < 0) ps1 = ~ps1;
+ if (!ps1) pd = 8;
+ else {
+ if ((ps1 & 0xF0) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80) == 0) { pd += 1; }
+ }
+ pd -= 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/clz16.h b/riscv/insns/clz16.h
new file mode 100644
index 0000000..e4e5601
--- /dev/null
+++ b/riscv/insns/clz16.h
@@ -0,0 +1,10 @@
+P_ONE_LOOP(16, {
+ pd = 0;
+ if (ps1 == 0) pd = 16;
+ else {
+ if ((ps1 & 0xFF00) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x8000) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/clz32.h b/riscv/insns/clz32.h
new file mode 100644
index 0000000..0c36cd2
--- /dev/null
+++ b/riscv/insns/clz32.h
@@ -0,0 +1,11 @@
+P_ONE_LOOP(32, {
+ pd = 0;
+ if (ps1 == 0) pd = 32;
+ else {
+ if ((ps1 & 0xFFFF0000) == 0) { pd += 16; ps1 <<= 16; }
+ if ((ps1 & 0xFF000000) == 0) { pd += 8; ps1 <<= 8; }
+ if ((ps1 & 0xF0000000) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0000000) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80000000) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/clz8.h b/riscv/insns/clz8.h
new file mode 100644
index 0000000..4e56b24
--- /dev/null
+++ b/riscv/insns/clz8.h
@@ -0,0 +1,9 @@
+P_ONE_LOOP(8, {
+ pd = 0;
+ if (ps1 == 0) pd = 8;
+ else {
+ if ((ps1 & 0xF0) == 0) { pd += 4; ps1 <<= 4; }
+ if ((ps1 & 0xC0) == 0) { pd += 2; ps1 <<= 2; }
+ if ((ps1 & 0x80) == 0) { pd += 1; }
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/cmpeq16.h b/riscv/insns/cmpeq16.h
new file mode 100644
index 0000000..75c0c98
--- /dev/null
+++ b/riscv/insns/cmpeq16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 == ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/cmpeq8.h b/riscv/insns/cmpeq8.h
new file mode 100644
index 0000000..f282a55
--- /dev/null
+++ b/riscv/insns/cmpeq8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 == ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/cras16.h b/riscv/insns/cras16.h
new file mode 100644
index 0000000..4392edf
--- /dev/null
+++ b/riscv/insns/cras16.h
@@ -0,0 +1,5 @@
+P_CROSS_LOOP(16, {
+ pd = ps1 + ps2;
+}, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/cras32.h b/riscv/insns/cras32.h
new file mode 100644
index 0000000..0604809
--- /dev/null
+++ b/riscv/insns/cras32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_LOOP(32, {
+ pd = ps1 + ps2;
+}, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/crsa16.h b/riscv/insns/crsa16.h
new file mode 100644
index 0000000..7ae0ca8
--- /dev/null
+++ b/riscv/insns/crsa16.h
@@ -0,0 +1,5 @@
+P_CROSS_LOOP(16, {
+ pd = ps1 - ps2;
+}, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/crsa32.h b/riscv/insns/crsa32.h
new file mode 100644
index 0000000..6d423a8
--- /dev/null
+++ b/riscv/insns/crsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_LOOP(32, {
+ pd = (int64_t)ps1 - ps2;
+}, {
+ pd = (int64_t)ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/insb.h b/riscv/insns/insb.h
new file mode 100644
index 0000000..199a8c6
--- /dev/null
+++ b/riscv/insns/insb.h
@@ -0,0 +1,3 @@
+require_extension('P');
+reg_t bpos = (xlen == 32) ? insn.p_imm2() : insn.p_imm3();
+WRITE_RD(sext_xlen(set_field(RD, make_mask64(bpos * 8, 8), P_B(RS1, 0)))); \ No newline at end of file
diff --git a/riscv/insns/kabs16.h b/riscv/insns/kabs16.h
new file mode 100644
index 0000000..f2b9472
--- /dev/null
+++ b/riscv/insns/kabs16.h
@@ -0,0 +1,9 @@
+P_ONE_LOOP(16, {
+ pd = ps1;
+ if (ps1 == INT16_MIN) {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = - ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kabs32.h b/riscv/insns/kabs32.h
new file mode 100644
index 0000000..796d827
--- /dev/null
+++ b/riscv/insns/kabs32.h
@@ -0,0 +1,9 @@
+P_ONE_LOOP(32, {
+ pd = ps1;
+ if (ps1 == INT32_MIN) {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = - ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kabs8.h b/riscv/insns/kabs8.h
new file mode 100644
index 0000000..51ee04d
--- /dev/null
+++ b/riscv/insns/kabs8.h
@@ -0,0 +1,9 @@
+P_ONE_LOOP(8, {
+ pd = ps1;
+ if (ps1 == INT8_MIN) {
+ pd = INT8_MAX;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = - ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kabsw.h b/riscv/insns/kabsw.h
new file mode 100644
index 0000000..677d51e
--- /dev/null
+++ b/riscv/insns/kabsw.h
@@ -0,0 +1,9 @@
+require_extension('P');
+int32_t rs1 = P_W(RS1, 0);
+
+if (rs1 == INT32_MIN) {
+ rs1 = INT32_MAX;
+ P_SET_OV(1);
+}
+
+WRITE_RD(sext_xlen(rs1 >= 0 ? rs1 : -rs1)); \ No newline at end of file
diff --git a/riscv/insns/kadd16.h b/riscv/insns/kadd16.h
new file mode 100644
index 0000000..b557e9a
--- /dev/null
+++ b/riscv/insns/kadd16.h
@@ -0,0 +1,5 @@
+P_LOOP(16, {
+ bool sat = false;
+ pd = (sat_add<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kadd32.h b/riscv/insns/kadd32.h
new file mode 100644
index 0000000..462e106
--- /dev/null
+++ b/riscv/insns/kadd32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_LOOP(32, {
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kadd64.h b/riscv/insns/kadd64.h
new file mode 100644
index 0000000..e99b960
--- /dev/null
+++ b/riscv/insns/kadd64.h
@@ -0,0 +1,5 @@
+P_64_PROFILE({
+ bool sat = false;
+ rd = (sat_add<int64_t, uint64_t>(rs1, rs2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kadd8.h b/riscv/insns/kadd8.h
new file mode 100644
index 0000000..e004a02
--- /dev/null
+++ b/riscv/insns/kadd8.h
@@ -0,0 +1,5 @@
+P_LOOP(8, {
+ bool sat = false;
+ pd = (sat_add<int8_t, uint8_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kaddh.h b/riscv/insns/kaddh.h
new file mode 100644
index 0000000..e81219b
--- /dev/null
+++ b/riscv/insns/kaddh.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) + (sreg_t)P_SW(RS2, 0);
+P_SAT(res, 16);
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/kaddw.h b/riscv/insns/kaddw.h
new file mode 100644
index 0000000..9b84c09
--- /dev/null
+++ b/riscv/insns/kaddw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) + (sreg_t)P_SW(RS2, 0);
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kcras16.h b/riscv/insns/kcras16.h
new file mode 100644
index 0000000..81bba89
--- /dev/null
+++ b/riscv/insns/kcras16.h
@@ -0,0 +1,9 @@
+P_CROSS_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_add<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_sub<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kcras32.h b/riscv/insns/kcras32.h
new file mode 100644
index 0000000..92e8245
--- /dev/null
+++ b/riscv/insns/kcras32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kcrsa16.h b/riscv/insns/kcrsa16.h
new file mode 100644
index 0000000..32c80d2
--- /dev/null
+++ b/riscv/insns/kcrsa16.h
@@ -0,0 +1,9 @@
+P_CROSS_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_sub<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_add<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kcrsa32.h b/riscv/insns/kcrsa32.h
new file mode 100644
index 0000000..a23c56a
--- /dev/null
+++ b/riscv/insns/kcrsa32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmabb.h b/riscv/insns/kdmabb.h
new file mode 100644
index 0000000..920cfdb
--- /dev/null
+++ b/riscv/insns/kdmabb.h
@@ -0,0 +1,16 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 0);
+
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+
+res += sext32(RD);
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmabb16.h b/riscv/insns/kdmabb16.h
new file mode 100644
index 0000000..eed5efa
--- /dev/null
+++ b/riscv/insns/kdmabb16.h
@@ -0,0 +1,17 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 0);
+ int32_t mres;
+ bool sat;
+
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ mres = aop * bop;
+ mres <<= 1;
+ } else {
+ mres = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmabt.h b/riscv/insns/kdmabt.h
new file mode 100644
index 0000000..5281900
--- /dev/null
+++ b/riscv/insns/kdmabt.h
@@ -0,0 +1,16 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 1);
+
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+
+res += sext32(RD);
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmabt16.h b/riscv/insns/kdmabt16.h
new file mode 100644
index 0000000..8190186
--- /dev/null
+++ b/riscv/insns/kdmabt16.h
@@ -0,0 +1,17 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 1);
+ int32_t mres;
+ bool sat;
+
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ mres = aop * bop;
+ mres <<= 1;
+ } else {
+ mres = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmatt.h b/riscv/insns/kdmatt.h
new file mode 100644
index 0000000..2885e3a
--- /dev/null
+++ b/riscv/insns/kdmatt.h
@@ -0,0 +1,16 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 1);
+sreg_t bop = P_SH(RS2, 1);
+
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+
+res += sext32(RD);
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmatt16.h b/riscv/insns/kdmatt16.h
new file mode 100644
index 0000000..22e8a00
--- /dev/null
+++ b/riscv/insns/kdmatt16.h
@@ -0,0 +1,17 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 1);
+ int32_t bop = P_SH(ps2, 1);
+ int32_t mres;
+ bool sat;
+
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ mres = aop * bop;
+ mres <<= 1;
+ } else {
+ mres = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmbb.h b/riscv/insns/kdmbb.h
new file mode 100644
index 0000000..af25355
--- /dev/null
+++ b/riscv/insns/kdmbb.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 0);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmbb16.h b/riscv/insns/kdmbb16.h
new file mode 100644
index 0000000..1353c00
--- /dev/null
+++ b/riscv/insns/kdmbb16.h
@@ -0,0 +1,12 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 0);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd <<= 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmbt.h b/riscv/insns/kdmbt.h
new file mode 100644
index 0000000..32017ed
--- /dev/null
+++ b/riscv/insns/kdmbt.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 1);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmbt16.h b/riscv/insns/kdmbt16.h
new file mode 100644
index 0000000..d934330
--- /dev/null
+++ b/riscv/insns/kdmbt16.h
@@ -0,0 +1,12 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 1);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd <<= 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kdmtt.h b/riscv/insns/kdmtt.h
new file mode 100644
index 0000000..3851280
--- /dev/null
+++ b/riscv/insns/kdmtt.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 1);
+sreg_t bop = P_SH(RS2, 1);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res <<= 1;
+} else {
+ res = INT32_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kdmtt16.h b/riscv/insns/kdmtt16.h
new file mode 100644
index 0000000..0aed777
--- /dev/null
+++ b/riscv/insns/kdmtt16.h
@@ -0,0 +1,12 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 1);
+ int32_t bop = P_SH(ps2, 1);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd <<= 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/khm16.h b/riscv/insns/khm16.h
new file mode 100644
index 0000000..4414345
--- /dev/null
+++ b/riscv/insns/khm16.h
@@ -0,0 +1,8 @@
+P_LOOP(16, {
+ if ((ps1 != INT16_MIN) | (ps2 != INT16_MIN)) {
+ pd = (ps1 * ps2) >> 15;
+ } else {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/khm8.h b/riscv/insns/khm8.h
new file mode 100644
index 0000000..aeef15c
--- /dev/null
+++ b/riscv/insns/khm8.h
@@ -0,0 +1,8 @@
+P_LOOP(8, {
+ if ((ps1 != INT8_MIN) | (ps2 != INT8_MIN)) {
+ pd = (ps1 * ps2) >> 7;
+ } else {
+ pd = INT8_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/khmbb.h b/riscv/insns/khmbb.h
new file mode 100644
index 0000000..6a59aa7
--- /dev/null
+++ b/riscv/insns/khmbb.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 0);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res >>= 15;
+} else {
+ res = INT16_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/khmbb16.h b/riscv/insns/khmbb16.h
new file mode 100644
index 0000000..42a1d1b
--- /dev/null
+++ b/riscv/insns/khmbb16.h
@@ -0,0 +1,13 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 0);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd >>= 15;
+ } else {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ }
+ pd = (int16_t)pd;
+}) \ No newline at end of file
diff --git a/riscv/insns/khmbt.h b/riscv/insns/khmbt.h
new file mode 100644
index 0000000..cdbd00f
--- /dev/null
+++ b/riscv/insns/khmbt.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 0);
+sreg_t bop = P_SH(RS2, 1);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res >>= 15;
+} else {
+ res = INT16_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/khmbt16.h b/riscv/insns/khmbt16.h
new file mode 100644
index 0000000..f187902
--- /dev/null
+++ b/riscv/insns/khmbt16.h
@@ -0,0 +1,13 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 0);
+ int32_t bop = P_SH(ps2, 1);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd >>= 15;
+ } else {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ }
+ pd = (int16_t)pd;
+}) \ No newline at end of file
diff --git a/riscv/insns/khmtt.h b/riscv/insns/khmtt.h
new file mode 100644
index 0000000..efcd479
--- /dev/null
+++ b/riscv/insns/khmtt.h
@@ -0,0 +1,12 @@
+require_extension('P');
+sreg_t res;
+sreg_t aop = P_SH(RS1, 1);
+sreg_t bop = P_SH(RS2, 1);
+if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ res = aop * bop;
+ res >>= 15;
+} else {
+ res = INT16_MAX;
+ P_SET_OV(1);
+}
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/khmtt16.h b/riscv/insns/khmtt16.h
new file mode 100644
index 0000000..7b70220
--- /dev/null
+++ b/riscv/insns/khmtt16.h
@@ -0,0 +1,13 @@
+require_rv64;
+P_LOOP(32, {
+ int32_t aop = P_SH(ps1, 1);
+ int32_t bop = P_SH(ps2, 1);
+ if ((INT16_MIN != aop) | (INT16_MIN != bop)) {
+ pd = aop * bop;
+ pd >>= 15;
+ } else {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ }
+ pd = (int16_t)pd;
+}) \ No newline at end of file
diff --git a/riscv/insns/khmx16.h b/riscv/insns/khmx16.h
new file mode 100644
index 0000000..dd5ddf1
--- /dev/null
+++ b/riscv/insns/khmx16.h
@@ -0,0 +1,8 @@
+P_CROSS_LOOP(16, {
+ if ((ps1 != INT16_MIN) | (ps2 != INT16_MIN)) {
+ pd = (ps1 * ps2) >> 15;
+ } else {
+ pd = INT16_MAX;
+ P_SET_OV(1);
+ }
+},) \ No newline at end of file
diff --git a/riscv/insns/khmx8.h b/riscv/insns/khmx8.h
new file mode 100644
index 0000000..41770e8
--- /dev/null
+++ b/riscv/insns/khmx8.h
@@ -0,0 +1,8 @@
+P_CROSS_LOOP(8, {
+ if ((ps1 != INT8_MIN) | (ps2 != INT8_MIN)) {
+ pd = (ps1 * ps2) >> 7;
+ } else {
+ pd = INT8_MAX;
+ P_SET_OV(1);
+ }
+},) \ No newline at end of file
diff --git a/riscv/insns/kmabb.h b/riscv/insns/kmabb.h
new file mode 100644
index 0000000..18b01fb
--- /dev/null
+++ b/riscv/insns/kmabb.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int32_t mres = P_SH(ps1, 0) * P_SH(ps2, 0);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmabb32.h b/riscv/insns/kmabb32.h
new file mode 100644
index 0000000..0c417af
--- /dev/null
+++ b/riscv/insns/kmabb32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+bool sat = false;
+sreg_t mres = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+WRITE_RD((sat_add<int64_t, uint64_t>(RD, mres, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmabt.h b/riscv/insns/kmabt.h
new file mode 100644
index 0000000..eeab423
--- /dev/null
+++ b/riscv/insns/kmabt.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int32_t mres = P_SH(ps1, 0) * P_SH(ps2, 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmabt32.h b/riscv/insns/kmabt32.h
new file mode 100644
index 0000000..42701f4
--- /dev/null
+++ b/riscv/insns/kmabt32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+bool sat = false;
+sreg_t mres = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+WRITE_RD((sat_add<int64_t, uint64_t>(RD, mres, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmada.h b/riscv/insns/kmada.h
new file mode 100644
index 0000000..2fb8fd4
--- /dev/null
+++ b/riscv/insns/kmada.h
@@ -0,0 +1,3 @@
+P_REDUCTION_LOOP(32, 16, true, true, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmadrs.h b/riscv/insns/kmadrs.h
new file mode 100644
index 0000000..8b5a888
--- /dev/null
+++ b/riscv/insns/kmadrs.h
@@ -0,0 +1,6 @@
+P_REDUCTION_LOOP(32, 16, true, true, {
+ if (j & 1)
+ pd_res -= ps1 * ps2;
+ else
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmadrs32.h b/riscv/insns/kmadrs32.h
new file mode 100644
index 0000000..f502b0e
--- /dev/null
+++ b/riscv/insns/kmadrs32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, mres0, -mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmads.h b/riscv/insns/kmads.h
new file mode 100644
index 0000000..f1d9948
--- /dev/null
+++ b/riscv/insns/kmads.h
@@ -0,0 +1,6 @@
+P_REDUCTION_LOOP(32, 16, true, true, {
+ if (j & 1)
+ pd_res += ps1 * ps2;
+ else
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmads32.h b/riscv/insns/kmads32.h
new file mode 100644
index 0000000..79ad4a6
--- /dev/null
+++ b/riscv/insns/kmads32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, -mres0, mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmar64.h b/riscv/insns/kmar64.h
new file mode 100644
index 0000000..eb6275c
--- /dev/null
+++ b/riscv/insns/kmar64.h
@@ -0,0 +1,15 @@
+P_64_PROFILE_BASE()
+P_64_PROFILE_PARAM(true, false)
+
+bool sat = false;
+sreg_t mres0 = (sreg_t)P_SW(rs1, 0) * P_SW(rs2, 0);
+sreg_t mres1 = (sreg_t)P_SW(rs1, 1) * P_SW(rs2, 1);
+sreg_t res;
+
+if (xlen == 32) {
+ rd = (sat_add<int64_t, uint64_t>(rd, mres0, sat));
+} else {
+ rd = (sat_add<int64_t, uint64_t>(rd, mres0, mres1, sat));
+}
+P_SET_OV(sat);
+P_64_PROFILE_END() \ No newline at end of file
diff --git a/riscv/insns/kmatt.h b/riscv/insns/kmatt.h
new file mode 100644
index 0000000..e2611c6
--- /dev/null
+++ b/riscv/insns/kmatt.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int32_t mres = P_SH(ps1, 1) * P_SH(ps2, 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, mres, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmatt32.h b/riscv/insns/kmatt32.h
new file mode 100644
index 0000000..e0e1a90
--- /dev/null
+++ b/riscv/insns/kmatt32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+bool sat = false;
+sreg_t mres = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+WRITE_RD((sat_add<int64_t, uint64_t>(RD, mres, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmaxda.h b/riscv/insns/kmaxda.h
new file mode 100644
index 0000000..7aaca96
--- /dev/null
+++ b/riscv/insns/kmaxda.h
@@ -0,0 +1,3 @@
+P_REDUCTION_CROSS_LOOP(32, 16, true, true, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmaxda32.h b/riscv/insns/kmaxda32.h
new file mode 100644
index 0000000..0fb0c70
--- /dev/null
+++ b/riscv/insns/kmaxda32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 0);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, mres0, mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmaxds.h b/riscv/insns/kmaxds.h
new file mode 100644
index 0000000..d6f36f8
--- /dev/null
+++ b/riscv/insns/kmaxds.h
@@ -0,0 +1,6 @@
+P_REDUCTION_CROSS_LOOP(32, 16, true, true, {
+ if (j & 1)
+ pd_res += ps1 * ps2;
+ else
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmaxds32.h b/riscv/insns/kmaxds32.h
new file mode 100644
index 0000000..1fd93d3
--- /dev/null
+++ b/riscv/insns/kmaxds32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 0);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, -mres0, mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmda.h b/riscv/insns/kmda.h
new file mode 100644
index 0000000..ceba4ac
--- /dev/null
+++ b/riscv/insns/kmda.h
@@ -0,0 +1,3 @@
+P_REDUCTION_LOOP(32, 16, false, true, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmda32.h b/riscv/insns/kmda32.h
new file mode 100644
index 0000000..6233284
--- /dev/null
+++ b/riscv/insns/kmda32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(mres0, mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmmac.h b/riscv/insns/kmmac.h
new file mode 100644
index 0000000..124771a
--- /dev/null
+++ b/riscv/insns/kmmac.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, (mres >> 32), sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmac_u.h b/riscv/insns/kmmac_u.h
new file mode 100644
index 0000000..9a0c580
--- /dev/null
+++ b/riscv/insns/kmmac_u.h
@@ -0,0 +1,7 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ int32_t round = (((mres >> 31) + 1) >> 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, round, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawb.h b/riscv/insns/kmmawb.h
new file mode 100644
index 0000000..28eb0a8
--- /dev/null
+++ b/riscv/insns/kmmawb.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t)ps1 * P_SH(ps2, 0);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, (mres >> 16), sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawb2.h b/riscv/insns/kmmawb2.h
new file mode 100644
index 0000000..e82bfda
--- /dev/null
+++ b/riscv/insns/kmmawb2.h
@@ -0,0 +1,14 @@
+P_LOOP(32, {
+ int64_t addop = 0;
+ int64_t mres = 0;
+ bool sat = false;
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 0))) {
+ mres = ((int64_t) ps1 * P_SH(ps2, 0)) << 1;
+ addop = mres >> 16;
+ } else {
+ addop = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, addop, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawb2_u.h b/riscv/insns/kmmawb2_u.h
new file mode 100644
index 0000000..fb4b075
--- /dev/null
+++ b/riscv/insns/kmmawb2_u.h
@@ -0,0 +1,14 @@
+P_LOOP(32, {
+ int64_t addop = 0;
+ int64_t mres = 0;
+ bool sat = false;
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 0))) {
+ mres = ((int64_t) ps1 * P_SH(ps2, 0)) << 1;
+ addop = ((mres >> 15) + 1) >> 1;
+ } else {
+ addop = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, addop, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawb_u.h b/riscv/insns/kmmawb_u.h
new file mode 100644
index 0000000..4c51b74
--- /dev/null
+++ b/riscv/insns/kmmawb_u.h
@@ -0,0 +1,7 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t)ps1 * P_SH(ps2, 0);
+ int32_t round = (((mres >> 15) + 1) >> 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, round, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawt.h b/riscv/insns/kmmawt.h
new file mode 100644
index 0000000..444546f
--- /dev/null
+++ b/riscv/insns/kmmawt.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t)ps1 * P_SH(ps2, 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, (mres >> 16), sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawt2.h b/riscv/insns/kmmawt2.h
new file mode 100644
index 0000000..c92d8c7
--- /dev/null
+++ b/riscv/insns/kmmawt2.h
@@ -0,0 +1,14 @@
+P_LOOP(32, {
+ int64_t addop = 0;
+ int64_t mres = 0;
+ bool sat = false;
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 1))) {
+ mres = ((int64_t) ps1 * P_SH(ps2, 1)) << 1;
+ addop = mres >> 16;
+ } else {
+ addop = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, addop, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawt2_u.h b/riscv/insns/kmmawt2_u.h
new file mode 100644
index 0000000..46dca79
--- /dev/null
+++ b/riscv/insns/kmmawt2_u.h
@@ -0,0 +1,14 @@
+P_LOOP(32, {
+ int64_t addop = 0;
+ int64_t mres = 0;
+ bool sat = false;
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 1))) {
+ mres = ((int64_t) ps1 * P_SH(ps2, 1)) << 1;
+ addop = ((mres >> 15) + 1) >> 1;
+ } else {
+ addop = INT32_MAX;
+ P_SET_OV(1);
+ }
+ pd = (sat_add<int32_t, uint32_t>(pd, addop, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmawt_u.h b/riscv/insns/kmmawt_u.h
new file mode 100644
index 0000000..fe1dff0
--- /dev/null
+++ b/riscv/insns/kmmawt_u.h
@@ -0,0 +1,7 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t)ps1 * P_SH(ps2, 1);
+ int32_t round = (((mres >> 15) + 1) >> 1);
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(pd, round, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmsb.h b/riscv/insns/kmmsb.h
new file mode 100644
index 0000000..f1b4c8b
--- /dev/null
+++ b/riscv/insns/kmmsb.h
@@ -0,0 +1,6 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(pd, (mres >> 32), sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmsb_u.h b/riscv/insns/kmmsb_u.h
new file mode 100644
index 0000000..864c66c
--- /dev/null
+++ b/riscv/insns/kmmsb_u.h
@@ -0,0 +1,7 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ int32_t round = (((mres >> 31) + 1) >> 1);
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(pd, round, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmwb2.h b/riscv/insns/kmmwb2.h
new file mode 100644
index 0000000..8574aa5
--- /dev/null
+++ b/riscv/insns/kmmwb2.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 0))) {
+ int64_t mres = ((int64_t) ps1 * P_SH(ps2, 0)) << 1;
+ pd = mres >> 16;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmwb2_u.h b/riscv/insns/kmmwb2_u.h
new file mode 100644
index 0000000..4216ad1
--- /dev/null
+++ b/riscv/insns/kmmwb2_u.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 0))) {
+ int64_t mres = ((int64_t) ps1 * P_SH(ps2, 0)) << 1;
+ pd = ((mres >> 15) + 1) >> 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmwt2.h b/riscv/insns/kmmwt2.h
new file mode 100644
index 0000000..62b47cd
--- /dev/null
+++ b/riscv/insns/kmmwt2.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 1))) {
+ int64_t mres = ((int64_t) ps1 * P_SH(ps2, 1)) << 1;
+ pd = mres >> 16;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kmmwt2_u.h b/riscv/insns/kmmwt2_u.h
new file mode 100644
index 0000000..d76d890
--- /dev/null
+++ b/riscv/insns/kmmwt2_u.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT16_MIN != P_SH(ps2, 1))) {
+ int64_t mres = ((int64_t) ps1 * P_SH(ps2, 1)) << 1;
+ pd = ((mres >> 15) + 1) >> 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kmsda.h b/riscv/insns/kmsda.h
new file mode 100644
index 0000000..4992ac3
--- /dev/null
+++ b/riscv/insns/kmsda.h
@@ -0,0 +1,3 @@
+P_REDUCTION_LOOP(32, 16, true, true, {
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmsda32.h b/riscv/insns/kmsda32.h
new file mode 100644
index 0000000..1a3a13b
--- /dev/null
+++ b/riscv/insns/kmsda32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, -mres0, -mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmsr64.h b/riscv/insns/kmsr64.h
new file mode 100644
index 0000000..bdc405e
--- /dev/null
+++ b/riscv/insns/kmsr64.h
@@ -0,0 +1,25 @@
+P_64_PROFILE_BASE()
+P_64_PROFILE_PARAM(true, false)
+
+bool sat = false;
+sreg_t mres0 = -(sreg_t)P_SW(rs1, 0) * P_SW(rs2, 0);
+sreg_t mres1 = -(sreg_t)P_SW(rs1, 1) * P_SW(rs2, 1);
+sreg_t res;
+
+if (xlen == 32) {
+ rd = (sat_add<int64_t, uint64_t>(rd, mres0, sat));
+} else {
+ if ((rd ^ mres0) < 0) {
+ res = rd + mres0;
+ rd = (sat_add<int64_t, uint64_t>(res, mres1, sat));
+ } else if ((rd ^ mres1) < 0) {
+ res = rd + mres1;
+ rd = (sat_add<int64_t, uint64_t>(res, mres0, sat));
+ } else {
+ rd = (sat_add<int64_t, uint64_t>(rd, mres0, sat));
+ P_SET_OV(sat);
+ rd = (sat_add<int64_t, uint64_t>(rd, mres1, sat));
+ }
+}
+P_SET_OV(sat);
+P_64_PROFILE_END() \ No newline at end of file
diff --git a/riscv/insns/kmsxda.h b/riscv/insns/kmsxda.h
new file mode 100644
index 0000000..34b2b1d
--- /dev/null
+++ b/riscv/insns/kmsxda.h
@@ -0,0 +1,3 @@
+P_REDUCTION_CROSS_LOOP(32, 16, true, true, {
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmsxda32.h b/riscv/insns/kmsxda32.h
new file mode 100644
index 0000000..8bea0c2
--- /dev/null
+++ b/riscv/insns/kmsxda32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 0);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(RD, -mres0, -mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/kmxda.h b/riscv/insns/kmxda.h
new file mode 100644
index 0000000..0f6fb03
--- /dev/null
+++ b/riscv/insns/kmxda.h
@@ -0,0 +1,3 @@
+P_REDUCTION_CROSS_LOOP(32, 16, false, true, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/kmxda32.h b/riscv/insns/kmxda32.h
new file mode 100644
index 0000000..d33cdf1
--- /dev/null
+++ b/riscv/insns/kmxda32.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+bool sat;
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 0);
+
+WRITE_RD((sat_add<sreg_t, reg_t>(mres0, mres1, sat)));
+P_SET_OV(sat); \ No newline at end of file
diff --git a/riscv/insns/ksll16.h b/riscv/insns/ksll16.h
new file mode 100644
index 0000000..405967a
--- /dev/null
+++ b/riscv/insns/ksll16.h
@@ -0,0 +1,5 @@
+P_X_LOOP(16, 4, {
+ auto res = (sreg_t)ps1 << sa;
+ P_SAT(res, 16);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/ksll32.h b/riscv/insns/ksll32.h
new file mode 100644
index 0000000..4533119
--- /dev/null
+++ b/riscv/insns/ksll32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_X_LOOP(32, 5, {
+ auto res = (sreg_t)ps1 << sa;
+ P_SAT(res, 32);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/ksll8.h b/riscv/insns/ksll8.h
new file mode 100644
index 0000000..482ceb7
--- /dev/null
+++ b/riscv/insns/ksll8.h
@@ -0,0 +1,5 @@
+P_X_LOOP(8, 3, {
+ auto res = (sreg_t)ps1 << sa;
+ P_SAT(res, 8);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/kslli16.h b/riscv/insns/kslli16.h
new file mode 100644
index 0000000..dd8c1de
--- /dev/null
+++ b/riscv/insns/kslli16.h
@@ -0,0 +1,5 @@
+P_I_LOOP(16, 4, {
+ auto res = (sreg_t)ps1 << imm4u;
+ P_SAT(res, 16);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/kslli32.h b/riscv/insns/kslli32.h
new file mode 100644
index 0000000..4c034bd
--- /dev/null
+++ b/riscv/insns/kslli32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_I_LOOP(32, 5, {
+ auto res = (sreg_t)ps1 << imm5u;
+ P_SAT(res, 32);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/kslli8.h b/riscv/insns/kslli8.h
new file mode 100644
index 0000000..cc8b08d
--- /dev/null
+++ b/riscv/insns/kslli8.h
@@ -0,0 +1,5 @@
+P_I_LOOP(8, 3, {
+ auto res = (sreg_t)ps1 << imm3u;
+ P_SAT(res, 8);
+ pd = res;
+}) \ No newline at end of file
diff --git a/riscv/insns/kslliw.h b/riscv/insns/kslliw.h
new file mode 100644
index 0000000..9c1d877
--- /dev/null
+++ b/riscv/insns/kslliw.h
@@ -0,0 +1,7 @@
+require_extension('P');
+sreg_t rs1 = sext32(RS1);
+sreg_t sa = insn.p_imm5();
+sreg_t res = rs1 << sa;
+
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/ksllw.h b/riscv/insns/ksllw.h
new file mode 100644
index 0000000..b95a6f2
--- /dev/null
+++ b/riscv/insns/ksllw.h
@@ -0,0 +1,7 @@
+require_extension('P');
+sreg_t rs1 = sext32(RS1);
+sreg_t sa = get_field(RS2, make_mask64(0, 5));
+sreg_t res = rs1 << sa;
+
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kslra16.h b/riscv/insns/kslra16.h
new file mode 100644
index 0000000..93ccec3
--- /dev/null
+++ b/riscv/insns/kslra16.h
@@ -0,0 +1,11 @@
+P_X_LOOP(16, 5, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 16) ? 15 : sa;
+ pd = ps1 >> sa;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 16);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslra16_u.h b/riscv/insns/kslra16_u.h
new file mode 100644
index 0000000..ac6b2ff
--- /dev/null
+++ b/riscv/insns/kslra16_u.h
@@ -0,0 +1,14 @@
+P_X_LOOP(16, 5, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 16) ? 15 : sa;
+ if(sa != 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 16);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslra32.h b/riscv/insns/kslra32.h
new file mode 100644
index 0000000..34ffbe4
--- /dev/null
+++ b/riscv/insns/kslra32.h
@@ -0,0 +1,12 @@
+require_rv64;
+P_X_LOOP(32, 6, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 32) ? 31 : sa;
+ pd = ps1 >> sa;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 32);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslra32_u.h b/riscv/insns/kslra32_u.h
new file mode 100644
index 0000000..8fe632d
--- /dev/null
+++ b/riscv/insns/kslra32_u.h
@@ -0,0 +1,15 @@
+require_rv64;
+P_X_LOOP(32, 6, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 32) ? 31 : sa;
+ if(sa != 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 32);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslra8.h b/riscv/insns/kslra8.h
new file mode 100644
index 0000000..8ed631d
--- /dev/null
+++ b/riscv/insns/kslra8.h
@@ -0,0 +1,11 @@
+P_X_LOOP(8, 4, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 8) ? 7 : sa;
+ pd = ps1 >> sa;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 8);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslra8_u.h b/riscv/insns/kslra8_u.h
new file mode 100644
index 0000000..9139775
--- /dev/null
+++ b/riscv/insns/kslra8_u.h
@@ -0,0 +1,14 @@
+P_X_LOOP(8, 4, {
+ if (ssa < 0) {
+ sa = -ssa;
+ sa = (sa == 8) ? 7 : sa;
+ if(sa != 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+ } else {
+ auto res = (sreg_t)ps1 << ssa;
+ P_SAT(res, 8);
+ pd = res;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kslraw.h b/riscv/insns/kslraw.h
new file mode 100644
index 0000000..36e1ea9
--- /dev/null
+++ b/riscv/insns/kslraw.h
@@ -0,0 +1,13 @@
+require_extension('P');
+sreg_t rs1 = sext32(RS1);
+sreg_t sa = int64_t(RS2) << (64 - 6) >> (64 - 6);
+
+if (sa < 0) {
+ sa = -sa;
+ sa = (sa == 32) ? 31 : sa;
+ WRITE_RD(sext32(rs1 >> sa));
+} else {
+ auto res = rs1 << sa;
+ P_SAT(res, 32);
+ WRITE_RD(sext32(res));
+} \ No newline at end of file
diff --git a/riscv/insns/kslraw_u.h b/riscv/insns/kslraw_u.h
new file mode 100644
index 0000000..3ff723d
--- /dev/null
+++ b/riscv/insns/kslraw_u.h
@@ -0,0 +1,13 @@
+require_extension('P');
+sreg_t rs1 = sext32(RS1);
+sreg_t sa = int64_t(RS2) << (64 - 6) >> (64 - 6);
+
+if (sa < 0) {
+ sa = -sa;
+ sa = (sa == 32) ? 31 : sa;
+ WRITE_RD(sext32(((rs1 >> (sa - 1)) + 1)) >> 1);
+} else {
+ auto res = rs1 << sa;
+ P_SAT(res, 32);
+ WRITE_RD(sext32(res));
+} \ No newline at end of file
diff --git a/riscv/insns/kstas16.h b/riscv/insns/kstas16.h
new file mode 100644
index 0000000..50d3a46
--- /dev/null
+++ b/riscv/insns/kstas16.h
@@ -0,0 +1,9 @@
+P_STRAIGHT_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_add<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_sub<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kstas32.h b/riscv/insns/kstas32.h
new file mode 100644
index 0000000..aea3c46
--- /dev/null
+++ b/riscv/insns/kstas32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kstsa16.h b/riscv/insns/kstsa16.h
new file mode 100644
index 0000000..76b1f22
--- /dev/null
+++ b/riscv/insns/kstsa16.h
@@ -0,0 +1,9 @@
+P_STRAIGHT_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_sub<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_add<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/kstsa32.h b/riscv/insns/kstsa32.h
new file mode 100644
index 0000000..d2ac99b
--- /dev/null
+++ b/riscv/insns/kstsa32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_add<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ksub16.h b/riscv/insns/ksub16.h
new file mode 100644
index 0000000..9994359
--- /dev/null
+++ b/riscv/insns/ksub16.h
@@ -0,0 +1,5 @@
+P_LOOP(16, {
+ bool sat = false;
+ pd = (sat_sub<int16_t, uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ksub32.h b/riscv/insns/ksub32.h
new file mode 100644
index 0000000..3e51440
--- /dev/null
+++ b/riscv/insns/ksub32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_LOOP(32, {
+ bool sat = false;
+ pd = (sat_sub<int32_t, uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ksub64.h b/riscv/insns/ksub64.h
new file mode 100644
index 0000000..c94c28e
--- /dev/null
+++ b/riscv/insns/ksub64.h
@@ -0,0 +1,5 @@
+P_64_PROFILE({
+ bool sat = false;
+ rd = (sat_sub<int64_t, uint64_t>(rs1, rs2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ksub8.h b/riscv/insns/ksub8.h
new file mode 100644
index 0000000..90a0c58
--- /dev/null
+++ b/riscv/insns/ksub8.h
@@ -0,0 +1,5 @@
+P_LOOP(8, {
+ bool sat = false;
+ pd = (sat_sub<int8_t, uint8_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ksubh.h b/riscv/insns/ksubh.h
new file mode 100644
index 0000000..9c21a94
--- /dev/null
+++ b/riscv/insns/ksubh.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) - (sreg_t)P_SW(RS2, 0);
+P_SAT(res, 16);
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/ksubw.h b/riscv/insns/ksubw.h
new file mode 100644
index 0000000..0542ba1
--- /dev/null
+++ b/riscv/insns/ksubw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) - (sreg_t)P_SW(RS2, 0);
+P_SAT(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/kwmmul.h b/riscv/insns/kwmmul.h
new file mode 100644
index 0000000..0332769
--- /dev/null
+++ b/riscv/insns/kwmmul.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT32_MIN != ps2)) {
+ int64_t mres = ((int64_t) ps1 * (int64_t) ps2) << 1;
+ pd = mres >> 32;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/kwmmul_u.h b/riscv/insns/kwmmul_u.h
new file mode 100644
index 0000000..73ba2c7
--- /dev/null
+++ b/riscv/insns/kwmmul_u.h
@@ -0,0 +1,9 @@
+P_LOOP(32, {
+ if((INT32_MIN != ps1) | (INT32_MIN != ps2)) {
+ int64_t mres = ((int64_t) ps1 * (int64_t) ps2) << 1;
+ pd = ((mres >> 31) + 1) >> 1;
+ } else {
+ pd = INT32_MAX;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/maddr32.h b/riscv/insns/maddr32.h
new file mode 100644
index 0000000..47cadd3
--- /dev/null
+++ b/riscv/insns/maddr32.h
@@ -0,0 +1,5 @@
+require_extension('P');
+reg_t mres = (reg_t)P_W(RS1, 0) * P_W(RS2, 0);
+reg_t rd = P_W(RD, 0);
+rd += mres;
+WRITE_RD(sext_xlen((int32_t)rd)); \ No newline at end of file
diff --git a/riscv/insns/maxw.h b/riscv/insns/maxw.h
new file mode 100644
index 0000000..7fc9404
--- /dev/null
+++ b/riscv/insns/maxw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+int32_t rs1w = P_W(RS1, 0);
+int32_t rs2w = P_W(RS2, 0);
+WRITE_RD(sext_xlen(rs1w >= rs2w ? rs1w : rs2w)); \ No newline at end of file
diff --git a/riscv/insns/minw.h b/riscv/insns/minw.h
new file mode 100644
index 0000000..1c00c63
--- /dev/null
+++ b/riscv/insns/minw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+int32_t rs1w = P_W(RS1, 0);
+int32_t rs2w = P_W(RS2, 0);
+WRITE_RD(sext_xlen(rs1w >= rs2w ? rs2w : rs1w)); \ No newline at end of file
diff --git a/riscv/insns/msubr32.h b/riscv/insns/msubr32.h
new file mode 100644
index 0000000..14dabb8
--- /dev/null
+++ b/riscv/insns/msubr32.h
@@ -0,0 +1,5 @@
+require_extension('P');
+reg_t mres = (reg_t)P_W(RS1, 0) * P_W(RS2, 0);
+reg_t rd = P_W(RD, 0);
+rd -= mres;
+WRITE_RD(sext_xlen((int32_t)rd)); \ No newline at end of file
diff --git a/riscv/insns/mulr64.h b/riscv/insns/mulr64.h
new file mode 100644
index 0000000..d2f807a
--- /dev/null
+++ b/riscv/insns/mulr64.h
@@ -0,0 +1,3 @@
+require_extension('P');
+reg_t rd = (reg_t)P_W(RS1, 0) * P_W(RS2, 0);
+P_64_PROFILE_END(); \ No newline at end of file
diff --git a/riscv/insns/mulsr64.h b/riscv/insns/mulsr64.h
new file mode 100644
index 0000000..2cc0e17
--- /dev/null
+++ b/riscv/insns/mulsr64.h
@@ -0,0 +1,3 @@
+require_extension('P');
+sreg_t rd = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+P_64_PROFILE_END(); \ No newline at end of file
diff --git a/riscv/insns/pbsad.h b/riscv/insns/pbsad.h
new file mode 100644
index 0000000..71f5aeb
--- /dev/null
+++ b/riscv/insns/pbsad.h
@@ -0,0 +1,3 @@
+P_REDUCTION_ULOOP(64, 8, false, false, {
+ pd_res += (ps1 > ps2 ? ps1 - ps2 : ps2 - ps1);
+}) \ No newline at end of file
diff --git a/riscv/insns/pbsada.h b/riscv/insns/pbsada.h
new file mode 100644
index 0000000..a8e1f46
--- /dev/null
+++ b/riscv/insns/pbsada.h
@@ -0,0 +1,3 @@
+P_REDUCTION_ULOOP(64, 8, true, false, {
+ pd_res += (ps1 > ps2 ? ps1 - ps2 : ps2 - ps1);
+}) \ No newline at end of file
diff --git a/riscv/insns/pkbb16.h b/riscv/insns/pkbb16.h
new file mode 100644
index 0000000..06283cf
--- /dev/null
+++ b/riscv/insns/pkbb16.h
@@ -0,0 +1 @@
+P_PK(16, 0, 0); \ No newline at end of file
diff --git a/riscv/insns/pkbb32.h b/riscv/insns/pkbb32.h
new file mode 100644
index 0000000..2b7ab37
--- /dev/null
+++ b/riscv/insns/pkbb32.h
@@ -0,0 +1,2 @@
+require_rv64;
+P_PK(32, 0, 0); \ No newline at end of file
diff --git a/riscv/insns/pkbt16.h b/riscv/insns/pkbt16.h
new file mode 100644
index 0000000..8247388
--- /dev/null
+++ b/riscv/insns/pkbt16.h
@@ -0,0 +1 @@
+P_PK(16, 0, 1); \ No newline at end of file
diff --git a/riscv/insns/pkbt32.h b/riscv/insns/pkbt32.h
new file mode 100644
index 0000000..426dcdc
--- /dev/null
+++ b/riscv/insns/pkbt32.h
@@ -0,0 +1,2 @@
+require_rv64;
+P_PK(32, 0, 1); \ No newline at end of file
diff --git a/riscv/insns/pktb16.h b/riscv/insns/pktb16.h
new file mode 100644
index 0000000..2925eae
--- /dev/null
+++ b/riscv/insns/pktb16.h
@@ -0,0 +1 @@
+P_PK(16, 1, 0); \ No newline at end of file
diff --git a/riscv/insns/pktb32.h b/riscv/insns/pktb32.h
new file mode 100644
index 0000000..ff98609
--- /dev/null
+++ b/riscv/insns/pktb32.h
@@ -0,0 +1,2 @@
+require_rv64;
+P_PK(32, 1, 0); \ No newline at end of file
diff --git a/riscv/insns/pktt16.h b/riscv/insns/pktt16.h
new file mode 100644
index 0000000..11cca59
--- /dev/null
+++ b/riscv/insns/pktt16.h
@@ -0,0 +1 @@
+P_PK(16, 1, 1); \ No newline at end of file
diff --git a/riscv/insns/pktt32.h b/riscv/insns/pktt32.h
new file mode 100644
index 0000000..49d86ee
--- /dev/null
+++ b/riscv/insns/pktt32.h
@@ -0,0 +1,2 @@
+require_rv64;
+P_PK(32, 1, 1); \ No newline at end of file
diff --git a/riscv/insns/radd16.h b/riscv/insns/radd16.h
new file mode 100644
index 0000000..4195516
--- /dev/null
+++ b/riscv/insns/radd16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/radd32.h b/riscv/insns/radd32.h
new file mode 100644
index 0000000..ec908e5
--- /dev/null
+++ b/riscv/insns/radd32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_LOOP(32, {
+ pd = ((int64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/radd64.h b/riscv/insns/radd64.h
new file mode 100644
index 0000000..91c6c24
--- /dev/null
+++ b/riscv/insns/radd64.h
@@ -0,0 +1,8 @@
+P_64_PROFILE({
+ rd = (rs1 + rs2) >> 1;
+ if (rs1 > 0 && rs2 > 0) {
+ rd &= ~((reg_t)1 << 63);
+ } else if (rs1 < 0 && rs2 < 0) {
+ rd |= ((reg_t)1 << 63);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/radd8.h b/riscv/insns/radd8.h
new file mode 100644
index 0000000..5ac638b
--- /dev/null
+++ b/riscv/insns/radd8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/raddw.h b/riscv/insns/raddw.h
new file mode 100644
index 0000000..983f33e
--- /dev/null
+++ b/riscv/insns/raddw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) + (sreg_t)P_SW(RS2, 0);
+res >>= 1;
+WRITE_RD(sext_xlen(res)); \ No newline at end of file
diff --git a/riscv/insns/rcras16.h b/riscv/insns/rcras16.h
new file mode 100644
index 0000000..6d91eb8
--- /dev/null
+++ b/riscv/insns/rcras16.h
@@ -0,0 +1,5 @@
+P_CROSS_LOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rcras32.h b/riscv/insns/rcras32.h
new file mode 100644
index 0000000..e9329cc
--- /dev/null
+++ b/riscv/insns/rcras32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_LOOP(32, {
+ pd = ((int64_t)ps1 + ps2) >> 1;
+}, {
+ pd = ((int64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rcrsa16.h b/riscv/insns/rcrsa16.h
new file mode 100644
index 0000000..a3a74f0
--- /dev/null
+++ b/riscv/insns/rcrsa16.h
@@ -0,0 +1,5 @@
+P_CROSS_LOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rcrsa32.h b/riscv/insns/rcrsa32.h
new file mode 100644
index 0000000..76cddbc
--- /dev/null
+++ b/riscv/insns/rcrsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_LOOP(32, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rstas16.h b/riscv/insns/rstas16.h
new file mode 100644
index 0000000..cf38442
--- /dev/null
+++ b/riscv/insns/rstas16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_LOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rstas32.h b/riscv/insns/rstas32.h
new file mode 100644
index 0000000..f23603f
--- /dev/null
+++ b/riscv/insns/rstas32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_LOOP(32, {
+ pd = ((int64_t)ps1 + ps2) >> 1;
+}, {
+ pd = ((int64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rstsa16.h b/riscv/insns/rstsa16.h
new file mode 100644
index 0000000..8c0476e
--- /dev/null
+++ b/riscv/insns/rstsa16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_LOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rstsa32.h b/riscv/insns/rstsa32.h
new file mode 100644
index 0000000..ee74dab
--- /dev/null
+++ b/riscv/insns/rstsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_LOOP(32, {
+ pd = ((int64_t)ps1 - ps2) >> 1;
+}, {
+ pd = ((int64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rsub16.h b/riscv/insns/rsub16.h
new file mode 100644
index 0000000..27e420d
--- /dev/null
+++ b/riscv/insns/rsub16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rsub32.h b/riscv/insns/rsub32.h
new file mode 100644
index 0000000..aaf7862
--- /dev/null
+++ b/riscv/insns/rsub32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_LOOP(32, {
+ pd = ((int64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rsub64.h b/riscv/insns/rsub64.h
new file mode 100644
index 0000000..a42e9c1
--- /dev/null
+++ b/riscv/insns/rsub64.h
@@ -0,0 +1,8 @@
+P_64_PROFILE({
+ rd = (rs1 - rs2) >> 1;
+ if (rs1 > 0 && rs2 < 0) {
+ rd &= ~((reg_t)1 << 63);
+ } else if(rs1 < 0 && rs2 > 0) {
+ rd |= ((reg_t)1 << 63);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/rsub8.h b/riscv/insns/rsub8.h
new file mode 100644
index 0000000..a3bfeb7
--- /dev/null
+++ b/riscv/insns/rsub8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/rsubw.h b/riscv/insns/rsubw.h
new file mode 100644
index 0000000..89083ad
--- /dev/null
+++ b/riscv/insns/rsubw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_SW(RS1, 0) - (sreg_t)P_SW(RS2, 0);
+res >>= 1;
+WRITE_RD(sext_xlen(res)); \ No newline at end of file
diff --git a/riscv/insns/sclip16.h b/riscv/insns/sclip16.h
new file mode 100644
index 0000000..6d0e60c
--- /dev/null
+++ b/riscv/insns/sclip16.h
@@ -0,0 +1,13 @@
+P_I_LOOP(16, 4, {
+ int64_t int_max = INT64_MAX >> (64 - (imm4u + 1));
+ int64_t int_min = INT64_MIN >> (64 - (imm4u + 1));
+ pd = ps1;
+
+ if (ps1 > int_max) {
+ pd = int_max;
+ P_SET_OV(1);
+ } else if (ps1 < int_min) {
+ pd = int_min;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/sclip32.h b/riscv/insns/sclip32.h
new file mode 100644
index 0000000..0d7793b
--- /dev/null
+++ b/riscv/insns/sclip32.h
@@ -0,0 +1,13 @@
+P_I_LOOP(32, 5, {
+ int64_t int_max = INT64_MAX >> (64 - (imm5u + 1));
+ int64_t int_min = INT64_MIN >> (64 - (imm5u + 1));
+ pd = ps1;
+
+ if (ps1 > int_max) {
+ pd = int_max;
+ P_SET_OV(1);
+ } else if (ps1 < int_min) {
+ pd = int_min;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/sclip8.h b/riscv/insns/sclip8.h
new file mode 100644
index 0000000..f95770f
--- /dev/null
+++ b/riscv/insns/sclip8.h
@@ -0,0 +1,13 @@
+P_I_LOOP(8, 3, {
+ int64_t int_max = INT64_MAX >> (64 - (imm3u + 1));
+ int64_t int_min = INT64_MIN >> (64 - (imm3u + 1));
+ pd = ps1;
+
+ if (ps1 > int_max) {
+ pd = int_max;
+ P_SET_OV(1);
+ } else if (ps1 < int_min) {
+ pd = int_min;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/scmple16.h b/riscv/insns/scmple16.h
new file mode 100644
index 0000000..6ee2e15
--- /dev/null
+++ b/riscv/insns/scmple16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 <= ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/scmple8.h b/riscv/insns/scmple8.h
new file mode 100644
index 0000000..1540cb1
--- /dev/null
+++ b/riscv/insns/scmple8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 <= ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/scmplt16.h b/riscv/insns/scmplt16.h
new file mode 100644
index 0000000..ad34ae0
--- /dev/null
+++ b/riscv/insns/scmplt16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 < ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/scmplt8.h b/riscv/insns/scmplt8.h
new file mode 100644
index 0000000..df17dff
--- /dev/null
+++ b/riscv/insns/scmplt8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 < ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/sll16.h b/riscv/insns/sll16.h
new file mode 100644
index 0000000..66b81eb
--- /dev/null
+++ b/riscv/insns/sll16.h
@@ -0,0 +1,3 @@
+P_X_ULOOP(16, 4, {
+ pd = ps1 << sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/sll32.h b/riscv/insns/sll32.h
new file mode 100644
index 0000000..b19be32
--- /dev/null
+++ b/riscv/insns/sll32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_X_ULOOP(32, 5, {
+ pd = ps1 << sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/sll8.h b/riscv/insns/sll8.h
new file mode 100644
index 0000000..7a36b3c
--- /dev/null
+++ b/riscv/insns/sll8.h
@@ -0,0 +1,3 @@
+P_X_ULOOP(8, 3, {
+ pd = ps1 << sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/slli16.h b/riscv/insns/slli16.h
new file mode 100644
index 0000000..64827d7
--- /dev/null
+++ b/riscv/insns/slli16.h
@@ -0,0 +1,3 @@
+P_I_ULOOP(16, 4, {
+ pd = ps1 << imm4u;
+}) \ No newline at end of file
diff --git a/riscv/insns/slli32.h b/riscv/insns/slli32.h
new file mode 100644
index 0000000..5278586
--- /dev/null
+++ b/riscv/insns/slli32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_I_ULOOP(32, 5, {
+ pd = ps1 << imm5u;
+}) \ No newline at end of file
diff --git a/riscv/insns/slli8.h b/riscv/insns/slli8.h
new file mode 100644
index 0000000..4e3935c
--- /dev/null
+++ b/riscv/insns/slli8.h
@@ -0,0 +1,3 @@
+P_I_ULOOP(8, 3, {
+ pd = ps1 << imm3u;
+}) \ No newline at end of file
diff --git a/riscv/insns/smal.h b/riscv/insns/smal.h
new file mode 100644
index 0000000..121e4dc
--- /dev/null
+++ b/riscv/insns/smal.h
@@ -0,0 +1,10 @@
+sreg_t res = 0;
+if (xlen == 32) {
+ res = RS1_PAIR;
+ res += sext_xlen(P_SH(RS2, 0) * P_SH(RS2, 1));
+ WRITE_RD_PAIR(res);
+} else {
+ res = sext_xlen(P_SH(RS2, 0) * P_SH(RS2, 1)) +
+ sext_xlen(P_SH(RS2, 2) * P_SH(RS2, 3)) + RS1;
+ WRITE_RD(res);
+} \ No newline at end of file
diff --git a/riscv/insns/smalbb.h b/riscv/insns/smalbb.h
new file mode 100644
index 0000000..4178c2b
--- /dev/null
+++ b/riscv/insns/smalbb.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += (sreg_t)P_SH(ps1, 0) * (sreg_t)P_SH(ps2, 0);
+}) \ No newline at end of file
diff --git a/riscv/insns/smalbt.h b/riscv/insns/smalbt.h
new file mode 100644
index 0000000..dab9f7a
--- /dev/null
+++ b/riscv/insns/smalbt.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += (sreg_t)P_SH(ps1, 0) * (sreg_t)P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smalda.h b/riscv/insns/smalda.h
new file mode 100644
index 0000000..95b16b1
--- /dev/null
+++ b/riscv/insns/smalda.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(16, {
+ rd += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smaldrs.h b/riscv/insns/smaldrs.h
new file mode 100644
index 0000000..8ce8c9b
--- /dev/null
+++ b/riscv/insns/smaldrs.h
@@ -0,0 +1,7 @@
+P_64_PROFILE_REDUCTION(16, {
+ if (i & 1) {
+ rd -= ps1 * ps2;
+ } else {
+ rd += ps1 * ps2;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/smalds.h b/riscv/insns/smalds.h
new file mode 100644
index 0000000..352fa1f
--- /dev/null
+++ b/riscv/insns/smalds.h
@@ -0,0 +1,7 @@
+P_64_PROFILE_REDUCTION(16, {
+ if (i & 1) {
+ rd += ps1 * ps2;
+ } else {
+ rd -= ps1 * ps2;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/smaltt.h b/riscv/insns/smaltt.h
new file mode 100644
index 0000000..72d5133
--- /dev/null
+++ b/riscv/insns/smaltt.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += P_SH(ps1, 1) * P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smalxda.h b/riscv/insns/smalxda.h
new file mode 100644
index 0000000..b5fa1d0
--- /dev/null
+++ b/riscv/insns/smalxda.h
@@ -0,0 +1,4 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += (sreg_t)P_SH(ps1, 0) * (sreg_t)P_SH(ps2, 1);
+ rd += (sreg_t)P_SH(ps1, 1) * (sreg_t)P_SH(ps2, 0);
+}) \ No newline at end of file
diff --git a/riscv/insns/smalxds.h b/riscv/insns/smalxds.h
new file mode 100644
index 0000000..3f3c6bd
--- /dev/null
+++ b/riscv/insns/smalxds.h
@@ -0,0 +1,4 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += (sreg_t)P_SH(ps1, 1) * (sreg_t)P_SH(ps2, 0);
+ rd -= (sreg_t)P_SH(ps1, 0) * (sreg_t)P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smaqa.h b/riscv/insns/smaqa.h
new file mode 100644
index 0000000..33c9df0
--- /dev/null
+++ b/riscv/insns/smaqa.h
@@ -0,0 +1,3 @@
+P_REDUCTION_LOOP(32, 8, true, false, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smaqa_su.h b/riscv/insns/smaqa_su.h
new file mode 100644
index 0000000..7af3386
--- /dev/null
+++ b/riscv/insns/smaqa_su.h
@@ -0,0 +1,3 @@
+P_REDUCTION_SULOOP(32, 8, true, false, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smar64.h b/riscv/insns/smar64.h
new file mode 100644
index 0000000..bc87d4f
--- /dev/null
+++ b/riscv/insns/smar64.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smax16.h b/riscv/insns/smax16.h
new file mode 100644
index 0000000..eac4eb4
--- /dev/null
+++ b/riscv/insns/smax16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smax32.h b/riscv/insns/smax32.h
new file mode 100644
index 0000000..b0b0075
--- /dev/null
+++ b/riscv/insns/smax32.h
@@ -0,0 +1,3 @@
+P_LOOP(32, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smax8.h b/riscv/insns/smax8.h
new file mode 100644
index 0000000..86303c7
--- /dev/null
+++ b/riscv/insns/smax8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smbb16.h b/riscv/insns/smbb16.h
new file mode 100644
index 0000000..d7e82b4
--- /dev/null
+++ b/riscv/insns/smbb16.h
@@ -0,0 +1,3 @@
+P_LOOP(32, {
+ pd = P_SH(ps1, 0) * P_SH(ps2, 0);
+}) \ No newline at end of file
diff --git a/riscv/insns/smbt16.h b/riscv/insns/smbt16.h
new file mode 100644
index 0000000..55e90c6
--- /dev/null
+++ b/riscv/insns/smbt16.h
@@ -0,0 +1,3 @@
+P_LOOP(32, {
+ pd = P_SH(ps1, 0) * P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smbt32.h b/riscv/insns/smbt32.h
new file mode 100644
index 0000000..1d16755
--- /dev/null
+++ b/riscv/insns/smbt32.h
@@ -0,0 +1,3 @@
+require_rv64;
+require_extension('P');
+WRITE_RD((sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1)); \ No newline at end of file
diff --git a/riscv/insns/smdrs.h b/riscv/insns/smdrs.h
new file mode 100644
index 0000000..2837a0c
--- /dev/null
+++ b/riscv/insns/smdrs.h
@@ -0,0 +1,6 @@
+P_REDUCTION_LOOP(32, 16, false, false, {
+ if (j & 1)
+ pd_res -= ps1 * ps2;
+ else
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smdrs32.h b/riscv/insns/smdrs32.h
new file mode 100644
index 0000000..53ac516
--- /dev/null
+++ b/riscv/insns/smdrs32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD(mres0 - mres1); \ No newline at end of file
diff --git a/riscv/insns/smds.h b/riscv/insns/smds.h
new file mode 100644
index 0000000..214ac34
--- /dev/null
+++ b/riscv/insns/smds.h
@@ -0,0 +1,6 @@
+P_REDUCTION_LOOP(32, 16, false, false, {
+ if (j & 1)
+ pd_res += ps1 * ps2;
+ else
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smds32.h b/riscv/insns/smds32.h
new file mode 100644
index 0000000..97440a5
--- /dev/null
+++ b/riscv/insns/smds32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 0);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1);
+
+WRITE_RD(mres1 - mres0); \ No newline at end of file
diff --git a/riscv/insns/smin16.h b/riscv/insns/smin16.h
new file mode 100644
index 0000000..921cccb
--- /dev/null
+++ b/riscv/insns/smin16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smin32.h b/riscv/insns/smin32.h
new file mode 100644
index 0000000..c1d0cfa
--- /dev/null
+++ b/riscv/insns/smin32.h
@@ -0,0 +1,3 @@
+P_LOOP(32, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smin8.h b/riscv/insns/smin8.h
new file mode 100644
index 0000000..04bd43a
--- /dev/null
+++ b/riscv/insns/smin8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmul.h b/riscv/insns/smmul.h
new file mode 100644
index 0000000..c305648
--- /dev/null
+++ b/riscv/insns/smmul.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ pd = mres >> 32;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmul_u.h b/riscv/insns/smmul_u.h
new file mode 100644
index 0000000..a41ceb7
--- /dev/null
+++ b/riscv/insns/smmul_u.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int64_t) ps2;
+ pd = ((mres >> 31) + 1) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmwb.h b/riscv/insns/smmwb.h
new file mode 100644
index 0000000..5e798f2
--- /dev/null
+++ b/riscv/insns/smmwb.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int16_t) P_H(ps2, 0);
+ pd = mres >> 16;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmwb_u.h b/riscv/insns/smmwb_u.h
new file mode 100644
index 0000000..9ea1e49
--- /dev/null
+++ b/riscv/insns/smmwb_u.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int16_t) P_H(ps2, 0);
+ pd = ((mres >> 15) + 1) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmwt.h b/riscv/insns/smmwt.h
new file mode 100644
index 0000000..0f88ce1
--- /dev/null
+++ b/riscv/insns/smmwt.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int16_t) P_H(ps2, 1);
+ pd = mres >> 16;
+}) \ No newline at end of file
diff --git a/riscv/insns/smmwt_u.h b/riscv/insns/smmwt_u.h
new file mode 100644
index 0000000..97e6d64
--- /dev/null
+++ b/riscv/insns/smmwt_u.h
@@ -0,0 +1,4 @@
+P_LOOP(32, {
+ int64_t mres = (int64_t) ps1 * (int16_t) P_H(ps2, 1);
+ pd = ((mres >> 15) + 1) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/smslda.h b/riscv/insns/smslda.h
new file mode 100644
index 0000000..eba5727
--- /dev/null
+++ b/riscv/insns/smslda.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(16, {
+ rd -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smslxda.h b/riscv/insns/smslxda.h
new file mode 100644
index 0000000..5e2c43d
--- /dev/null
+++ b/riscv/insns/smslxda.h
@@ -0,0 +1,4 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd -= (sreg_t)P_SH(ps1, 1) * (sreg_t)P_SH(ps2, 0);
+ rd -= (sreg_t)P_SH(ps1, 0) * (sreg_t)P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smsr64.h b/riscv/insns/smsr64.h
new file mode 100644
index 0000000..cc44fc5
--- /dev/null
+++ b/riscv/insns/smsr64.h
@@ -0,0 +1,3 @@
+P_64_PROFILE_REDUCTION(32, {
+ rd -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smtt16.h b/riscv/insns/smtt16.h
new file mode 100644
index 0000000..bbc5039
--- /dev/null
+++ b/riscv/insns/smtt16.h
@@ -0,0 +1,3 @@
+P_LOOP(32, {
+ pd = P_SH(ps1, 1) * P_SH(ps2, 1);
+}) \ No newline at end of file
diff --git a/riscv/insns/smtt32.h b/riscv/insns/smtt32.h
new file mode 100644
index 0000000..beb45d8
--- /dev/null
+++ b/riscv/insns/smtt32.h
@@ -0,0 +1,3 @@
+require_rv64;
+require_extension('P');
+WRITE_RD((sreg_t)P_SW(RS1, 1) * P_SW(RS2, 1)); \ No newline at end of file
diff --git a/riscv/insns/smul16.h b/riscv/insns/smul16.h
new file mode 100644
index 0000000..4ec914b
--- /dev/null
+++ b/riscv/insns/smul16.h
@@ -0,0 +1,3 @@
+P_MUL_LOOP(16, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smul8.h b/riscv/insns/smul8.h
new file mode 100644
index 0000000..ee01391
--- /dev/null
+++ b/riscv/insns/smul8.h
@@ -0,0 +1,3 @@
+P_MUL_LOOP(8, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smulx16.h b/riscv/insns/smulx16.h
new file mode 100644
index 0000000..623cb06
--- /dev/null
+++ b/riscv/insns/smulx16.h
@@ -0,0 +1,3 @@
+P_MUL_CROSS_LOOP(16, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smulx8.h b/riscv/insns/smulx8.h
new file mode 100644
index 0000000..d11614d
--- /dev/null
+++ b/riscv/insns/smulx8.h
@@ -0,0 +1,3 @@
+P_MUL_CROSS_LOOP(8, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smxds.h b/riscv/insns/smxds.h
new file mode 100644
index 0000000..49b720b
--- /dev/null
+++ b/riscv/insns/smxds.h
@@ -0,0 +1,6 @@
+P_REDUCTION_CROSS_LOOP(32, 16, false, false, {
+ if (j & 1)
+ pd_res += ps1 * ps2;
+ else
+ pd_res -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/smxds32.h b/riscv/insns/smxds32.h
new file mode 100644
index 0000000..25b1666
--- /dev/null
+++ b/riscv/insns/smxds32.h
@@ -0,0 +1,7 @@
+require_rv64;
+require_extension('P');
+
+sreg_t mres0 = (sreg_t)P_SW(RS1, 0) * P_SW(RS2, 1);
+sreg_t mres1 = (sreg_t)P_SW(RS1, 1) * P_SW(RS2, 0);
+
+WRITE_RD(mres1 - mres0); \ No newline at end of file
diff --git a/riscv/insns/sra16.h b/riscv/insns/sra16.h
new file mode 100644
index 0000000..bdc9c5f
--- /dev/null
+++ b/riscv/insns/sra16.h
@@ -0,0 +1,3 @@
+P_X_LOOP(16, 4, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra16_u.h b/riscv/insns/sra16_u.h
new file mode 100644
index 0000000..44ebbc3
--- /dev/null
+++ b/riscv/insns/sra16_u.h
@@ -0,0 +1,6 @@
+P_X_LOOP(16, 4, {
+ if(sa > 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra32.h b/riscv/insns/sra32.h
new file mode 100644
index 0000000..53a8c07
--- /dev/null
+++ b/riscv/insns/sra32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_X_LOOP(32, 5, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra32_u.h b/riscv/insns/sra32_u.h
new file mode 100644
index 0000000..c0ce87f
--- /dev/null
+++ b/riscv/insns/sra32_u.h
@@ -0,0 +1,7 @@
+require_rv64;
+P_X_LOOP(32, 5, {
+ if(sa > 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra8.h b/riscv/insns/sra8.h
new file mode 100644
index 0000000..c92e497
--- /dev/null
+++ b/riscv/insns/sra8.h
@@ -0,0 +1,3 @@
+P_X_LOOP(8, 3, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra8_u.h b/riscv/insns/sra8_u.h
new file mode 100644
index 0000000..e53736a
--- /dev/null
+++ b/riscv/insns/sra8_u.h
@@ -0,0 +1,6 @@
+P_X_LOOP(8, 3, {
+ if(sa > 0)
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/sra_u.h b/riscv/insns/sra_u.h
new file mode 100644
index 0000000..8efb031
--- /dev/null
+++ b/riscv/insns/sra_u.h
@@ -0,0 +1,9 @@
+require_extension('P');
+sreg_t rs1 = sext_xlen(RS1);
+reg_t sa = get_field(RS2, make_mask64(0, xlen == 32 ? 5 : 6));
+
+if (sa > 0) {
+ WRITE_RD(sext_xlen(((rs1 >> (sa - 1)) + 1) >> 1));
+} else {
+ WRITE_RD(sext_xlen(rs1));
+} \ No newline at end of file
diff --git a/riscv/insns/srai16.h b/riscv/insns/srai16.h
new file mode 100644
index 0000000..57fbdc3
--- /dev/null
+++ b/riscv/insns/srai16.h
@@ -0,0 +1,3 @@
+P_I_LOOP(16, 4, {
+ pd = ps1 >> imm4u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai16_u.h b/riscv/insns/srai16_u.h
new file mode 100644
index 0000000..b34bb00
--- /dev/null
+++ b/riscv/insns/srai16_u.h
@@ -0,0 +1,6 @@
+P_I_LOOP(16, 4, {
+ if (imm4u > 0)
+ pd = ((ps1 >> (imm4u - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai32.h b/riscv/insns/srai32.h
new file mode 100644
index 0000000..85172cf
--- /dev/null
+++ b/riscv/insns/srai32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_I_LOOP(32, 5, {
+ pd = ps1 >> imm5u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai32_u.h b/riscv/insns/srai32_u.h
new file mode 100644
index 0000000..5c7b06c
--- /dev/null
+++ b/riscv/insns/srai32_u.h
@@ -0,0 +1,7 @@
+require_rv64;
+P_I_LOOP(32, 5, {
+ if (imm5u > 0)
+ pd = ((ps1 >> (imm5u - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai8.h b/riscv/insns/srai8.h
new file mode 100644
index 0000000..d6f003d
--- /dev/null
+++ b/riscv/insns/srai8.h
@@ -0,0 +1,3 @@
+P_I_LOOP(8, 3, {
+ pd = ps1 >> imm3u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai8_u.h b/riscv/insns/srai8_u.h
new file mode 100644
index 0000000..c2970ed
--- /dev/null
+++ b/riscv/insns/srai8_u.h
@@ -0,0 +1,6 @@
+P_I_LOOP(8, 3, {
+ if (imm3u > 0)
+ pd = ((ps1 >> (imm3u - 1)) + 1) >> 1;
+ else
+ pd = ps1;
+}) \ No newline at end of file
diff --git a/riscv/insns/srai_u.h b/riscv/insns/srai_u.h
new file mode 100644
index 0000000..0655c6c
--- /dev/null
+++ b/riscv/insns/srai_u.h
@@ -0,0 +1,9 @@
+require_extension('P');
+sreg_t rs1 = sext_xlen(RS1);
+reg_t sa = xlen == 32 ? insn.p_imm5() : insn.p_imm6();
+
+if (sa > 0) {
+ WRITE_RD(sext_xlen(((rs1 >> (sa - 1)) + 1) >> 1));
+} else {
+ WRITE_RD(sext_xlen(rs1));
+} \ No newline at end of file
diff --git a/riscv/insns/sraiw_u.h b/riscv/insns/sraiw_u.h
new file mode 100644
index 0000000..dd26707
--- /dev/null
+++ b/riscv/insns/sraiw_u.h
@@ -0,0 +1,9 @@
+require_rv64;
+require_extension('P');
+
+reg_t sa = insn.p_imm5();
+if (sa != 0) {
+ WRITE_RD(sext32(((P_SW(RS1, 0) >> (sa - 1)) + 1) >> 1));
+} else {
+ WRITE_RD(sext32(P_SW(RS1, 0)));
+} \ No newline at end of file
diff --git a/riscv/insns/srl16.h b/riscv/insns/srl16.h
new file mode 100644
index 0000000..dede8e3
--- /dev/null
+++ b/riscv/insns/srl16.h
@@ -0,0 +1,3 @@
+P_X_ULOOP(16, 4, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/srl16_u.h b/riscv/insns/srl16_u.h
new file mode 100644
index 0000000..f7b0571
--- /dev/null
+++ b/riscv/insns/srl16_u.h
@@ -0,0 +1,7 @@
+P_X_ULOOP(16, 4, {
+ if (sa > 0) {
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/srl32.h b/riscv/insns/srl32.h
new file mode 100644
index 0000000..c50522f
--- /dev/null
+++ b/riscv/insns/srl32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_X_ULOOP(32, 5, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/srl32_u.h b/riscv/insns/srl32_u.h
new file mode 100644
index 0000000..3c770d5
--- /dev/null
+++ b/riscv/insns/srl32_u.h
@@ -0,0 +1,8 @@
+require_rv64;
+P_X_ULOOP(32, 5, {
+ if (sa > 0) {
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/srl8.h b/riscv/insns/srl8.h
new file mode 100644
index 0000000..f5891fa
--- /dev/null
+++ b/riscv/insns/srl8.h
@@ -0,0 +1,3 @@
+P_X_ULOOP(8, 3, {
+ pd = ps1 >> sa;
+}) \ No newline at end of file
diff --git a/riscv/insns/srl8_u.h b/riscv/insns/srl8_u.h
new file mode 100644
index 0000000..956d260
--- /dev/null
+++ b/riscv/insns/srl8_u.h
@@ -0,0 +1,7 @@
+P_X_ULOOP(8, 3, {
+ if (sa > 0) {
+ pd = ((ps1 >> (sa - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/srli16.h b/riscv/insns/srli16.h
new file mode 100644
index 0000000..0ad3cb4
--- /dev/null
+++ b/riscv/insns/srli16.h
@@ -0,0 +1,3 @@
+P_I_ULOOP(16, 4, {
+ pd = ps1 >> imm4u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srli16_u.h b/riscv/insns/srli16_u.h
new file mode 100644
index 0000000..4eb9248
--- /dev/null
+++ b/riscv/insns/srli16_u.h
@@ -0,0 +1,7 @@
+P_I_ULOOP(16, 4, {
+ if (imm4u > 0) {
+ pd = ((ps1 >> (imm4u - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/srli32.h b/riscv/insns/srli32.h
new file mode 100644
index 0000000..f5d229c
--- /dev/null
+++ b/riscv/insns/srli32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_I_ULOOP(32, 5, {
+ pd = ps1 >> imm5u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srli32_u.h b/riscv/insns/srli32_u.h
new file mode 100644
index 0000000..8aa9c1b
--- /dev/null
+++ b/riscv/insns/srli32_u.h
@@ -0,0 +1,8 @@
+require_rv64;
+P_I_ULOOP(32, 5, {
+ if (imm5u > 0) {
+ pd = ((ps1 >> (imm5u - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/srli8.h b/riscv/insns/srli8.h
new file mode 100644
index 0000000..bf64250
--- /dev/null
+++ b/riscv/insns/srli8.h
@@ -0,0 +1,3 @@
+P_I_ULOOP(8, 3, {
+ pd = ps1 >> imm3u;
+}) \ No newline at end of file
diff --git a/riscv/insns/srli8_u.h b/riscv/insns/srli8_u.h
new file mode 100644
index 0000000..1154177
--- /dev/null
+++ b/riscv/insns/srli8_u.h
@@ -0,0 +1,7 @@
+P_I_ULOOP(8, 3, {
+ if (imm3u > 0) {
+ pd = ((ps1 >> (imm3u - 1)) + 1) >> 1;
+ } else {
+ pd = ps1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/stas16.h b/riscv/insns/stas16.h
new file mode 100644
index 0000000..7cea5ae
--- /dev/null
+++ b/riscv/insns/stas16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_LOOP(16, {
+ pd = ps1 + ps2;
+}, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/stas32.h b/riscv/insns/stas32.h
new file mode 100644
index 0000000..fad20f0
--- /dev/null
+++ b/riscv/insns/stas32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_LOOP(32, {
+ pd = ps1 + ps2;
+}, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/stsa16.h b/riscv/insns/stsa16.h
new file mode 100644
index 0000000..9ca05ab
--- /dev/null
+++ b/riscv/insns/stsa16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_LOOP(16, {
+ pd = ps1 - ps2;
+}, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/stsa32.h b/riscv/insns/stsa32.h
new file mode 100644
index 0000000..b6be39a
--- /dev/null
+++ b/riscv/insns/stsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_LOOP(32, {
+ pd = ps1 - ps2;
+}, {
+ pd = ps1 + ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/sub16.h b/riscv/insns/sub16.h
new file mode 100644
index 0000000..1bcca23
--- /dev/null
+++ b/riscv/insns/sub16.h
@@ -0,0 +1,3 @@
+P_LOOP(16, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/sub32.h b/riscv/insns/sub32.h
new file mode 100644
index 0000000..06e645c
--- /dev/null
+++ b/riscv/insns/sub32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_LOOP(32, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/sub64.h b/riscv/insns/sub64.h
new file mode 100644
index 0000000..0b747fe
--- /dev/null
+++ b/riscv/insns/sub64.h
@@ -0,0 +1,3 @@
+P_64_PROFILE({
+ rd = rs1 - rs2;
+}) \ No newline at end of file
diff --git a/riscv/insns/sub8.h b/riscv/insns/sub8.h
new file mode 100644
index 0000000..8cdca73
--- /dev/null
+++ b/riscv/insns/sub8.h
@@ -0,0 +1,3 @@
+P_LOOP(8, {
+ pd = ps1 - ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/sunpkd810.h b/riscv/insns/sunpkd810.h
new file mode 100644
index 0000000..b93a981
--- /dev/null
+++ b/riscv/insns/sunpkd810.h
@@ -0,0 +1 @@
+P_SUNPKD8(1, 0) \ No newline at end of file
diff --git a/riscv/insns/sunpkd820.h b/riscv/insns/sunpkd820.h
new file mode 100644
index 0000000..5ded8f6
--- /dev/null
+++ b/riscv/insns/sunpkd820.h
@@ -0,0 +1 @@
+P_SUNPKD8(2, 0) \ No newline at end of file
diff --git a/riscv/insns/sunpkd830.h b/riscv/insns/sunpkd830.h
new file mode 100644
index 0000000..84688c4
--- /dev/null
+++ b/riscv/insns/sunpkd830.h
@@ -0,0 +1 @@
+P_SUNPKD8(3, 0) \ No newline at end of file
diff --git a/riscv/insns/sunpkd831.h b/riscv/insns/sunpkd831.h
new file mode 100644
index 0000000..bf19ef0
--- /dev/null
+++ b/riscv/insns/sunpkd831.h
@@ -0,0 +1 @@
+P_SUNPKD8(3, 1) \ No newline at end of file
diff --git a/riscv/insns/sunpkd832.h b/riscv/insns/sunpkd832.h
new file mode 100644
index 0000000..ae2ce0d
--- /dev/null
+++ b/riscv/insns/sunpkd832.h
@@ -0,0 +1 @@
+P_SUNPKD8(3, 2) \ No newline at end of file
diff --git a/riscv/insns/swap16.h b/riscv/insns/swap16.h
new file mode 100644
index 0000000..2b9f13d
--- /dev/null
+++ b/riscv/insns/swap16.h
@@ -0,0 +1,4 @@
+P_ONE_LOOP_BASE(16)
+auto pd = P_FIELD(rs1, (i ^ 1), 16);
+WRITE_PD();
+P_LOOP_END() \ No newline at end of file
diff --git a/riscv/insns/swap8.h b/riscv/insns/swap8.h
new file mode 100644
index 0000000..d57304d
--- /dev/null
+++ b/riscv/insns/swap8.h
@@ -0,0 +1,4 @@
+P_ONE_LOOP_BASE(8)
+auto pd = P_FIELD(rs1, (i ^ 1), 8);
+WRITE_PD();
+P_LOOP_END() \ No newline at end of file
diff --git a/riscv/insns/uclip16.h b/riscv/insns/uclip16.h
new file mode 100644
index 0000000..faf57ca
--- /dev/null
+++ b/riscv/insns/uclip16.h
@@ -0,0 +1,12 @@
+P_I_LOOP(16, 4, {
+ int64_t uint_max = imm4u ? UINT64_MAX >> (64 - imm4u) : 0;
+ pd = ps1;
+
+ if (ps1 > uint_max) {
+ pd = uint_max;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = 0;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/uclip32.h b/riscv/insns/uclip32.h
new file mode 100644
index 0000000..f5d89ad
--- /dev/null
+++ b/riscv/insns/uclip32.h
@@ -0,0 +1,12 @@
+P_I_LOOP(32, 5, {
+ int64_t uint_max = imm5u ? UINT64_MAX >> (64 - imm5u) : 0;
+ pd = ps1;
+
+ if (ps1 > uint_max) {
+ pd = uint_max;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = 0;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/uclip8.h b/riscv/insns/uclip8.h
new file mode 100644
index 0000000..e2978a6
--- /dev/null
+++ b/riscv/insns/uclip8.h
@@ -0,0 +1,12 @@
+P_I_LOOP(8, 3, {
+ int64_t uint_max = imm3u ? UINT64_MAX >> (64 - imm3u) : 0;
+ pd = ps1;
+
+ if (ps1 > uint_max) {
+ pd = uint_max;
+ P_SET_OV(1);
+ } else if (ps1 < 0) {
+ pd = 0;
+ P_SET_OV(1);
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/ucmple16.h b/riscv/insns/ucmple16.h
new file mode 100644
index 0000000..b3ba04d
--- /dev/null
+++ b/riscv/insns/ucmple16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 <= ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/ucmple8.h b/riscv/insns/ucmple8.h
new file mode 100644
index 0000000..78f12b8
--- /dev/null
+++ b/riscv/insns/ucmple8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 <= ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/ucmplt16.h b/riscv/insns/ucmplt16.h
new file mode 100644
index 0000000..fca78a8
--- /dev/null
+++ b/riscv/insns/ucmplt16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 < ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/ucmplt8.h b/riscv/insns/ucmplt8.h
new file mode 100644
index 0000000..0ebdf50
--- /dev/null
+++ b/riscv/insns/ucmplt8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 < ps2) ? -1 : 0;
+}) \ No newline at end of file
diff --git a/riscv/insns/ukadd16.h b/riscv/insns/ukadd16.h
new file mode 100644
index 0000000..57c661a
--- /dev/null
+++ b/riscv/insns/ukadd16.h
@@ -0,0 +1,5 @@
+P_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_addu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukadd32.h b/riscv/insns/ukadd32.h
new file mode 100644
index 0000000..7613e59
--- /dev/null
+++ b/riscv/insns/ukadd32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_addu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukadd64.h b/riscv/insns/ukadd64.h
new file mode 100644
index 0000000..047e276
--- /dev/null
+++ b/riscv/insns/ukadd64.h
@@ -0,0 +1,5 @@
+P_64_UPROFILE({
+ bool sat = false;
+ rd = (sat_addu<uint64_t>(rs1, rs2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukadd8.h b/riscv/insns/ukadd8.h
new file mode 100644
index 0000000..68708e2
--- /dev/null
+++ b/riscv/insns/ukadd8.h
@@ -0,0 +1,5 @@
+P_ULOOP(8, {
+ bool sat = false;
+ pd = (sat_addu<uint8_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukaddh.h b/riscv/insns/ukaddh.h
new file mode 100644
index 0000000..4eed73f
--- /dev/null
+++ b/riscv/insns/ukaddh.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_W(RS1, 0) + (sreg_t)P_W(RS2, 0);
+P_SATU(res, 16);
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/ukaddw.h b/riscv/insns/ukaddw.h
new file mode 100644
index 0000000..61c109f
--- /dev/null
+++ b/riscv/insns/ukaddw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_W(RS1, 0) + (sreg_t)P_W(RS2, 0);
+P_SATU(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/ukcras16.h b/riscv/insns/ukcras16.h
new file mode 100644
index 0000000..90ef8f8
--- /dev/null
+++ b/riscv/insns/ukcras16.h
@@ -0,0 +1,9 @@
+P_CROSS_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_addu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_subu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+})
diff --git a/riscv/insns/ukcras32.h b/riscv/insns/ukcras32.h
new file mode 100644
index 0000000..9478778
--- /dev/null
+++ b/riscv/insns/ukcras32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_addu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_subu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+})
diff --git a/riscv/insns/ukcrsa16.h b/riscv/insns/ukcrsa16.h
new file mode 100644
index 0000000..91c505d
--- /dev/null
+++ b/riscv/insns/ukcrsa16.h
@@ -0,0 +1,9 @@
+P_CROSS_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_subu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_addu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukcrsa32.h b/riscv/insns/ukcrsa32.h
new file mode 100644
index 0000000..fd6a455
--- /dev/null
+++ b/riscv/insns/ukcrsa32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_subu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_addu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukmar64.h b/riscv/insns/ukmar64.h
new file mode 100644
index 0000000..e12be5d
--- /dev/null
+++ b/riscv/insns/ukmar64.h
@@ -0,0 +1,5 @@
+P_64_UPROFILE_REDUCTION(32, {
+ bool sat = false;
+ rd = (sat_addu<uint64_t>(rd, ps1 * ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukmsr64.h b/riscv/insns/ukmsr64.h
new file mode 100644
index 0000000..d2e0ac3
--- /dev/null
+++ b/riscv/insns/ukmsr64.h
@@ -0,0 +1,5 @@
+P_64_UPROFILE_REDUCTION(32, {
+ bool sat = false;
+ rd = (sat_subu<uint64_t>(rd, ps1 * ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukstas16.h b/riscv/insns/ukstas16.h
new file mode 100644
index 0000000..110b247
--- /dev/null
+++ b/riscv/insns/ukstas16.h
@@ -0,0 +1,9 @@
+P_STRAIGHT_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_addu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_subu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukstas32.h b/riscv/insns/ukstas32.h
new file mode 100644
index 0000000..62a39eb
--- /dev/null
+++ b/riscv/insns/ukstas32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_addu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_subu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukstsa16.h b/riscv/insns/ukstsa16.h
new file mode 100644
index 0000000..47a2fc1
--- /dev/null
+++ b/riscv/insns/ukstsa16.h
@@ -0,0 +1,9 @@
+P_STRAIGHT_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_subu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_addu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/ukstsa32.h b/riscv/insns/ukstsa32.h
new file mode 100644
index 0000000..52a078f
--- /dev/null
+++ b/riscv/insns/ukstsa32.h
@@ -0,0 +1,10 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_subu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}, {
+ bool sat = false;
+ pd = (sat_addu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/uksub16.h b/riscv/insns/uksub16.h
new file mode 100644
index 0000000..b330622
--- /dev/null
+++ b/riscv/insns/uksub16.h
@@ -0,0 +1,5 @@
+P_ULOOP(16, {
+ bool sat = false;
+ pd = (sat_subu<uint16_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/uksub32.h b/riscv/insns/uksub32.h
new file mode 100644
index 0000000..da1fd20
--- /dev/null
+++ b/riscv/insns/uksub32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_ULOOP(32, {
+ bool sat = false;
+ pd = (sat_subu<uint32_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/uksub64.h b/riscv/insns/uksub64.h
new file mode 100644
index 0000000..7a0deb1
--- /dev/null
+++ b/riscv/insns/uksub64.h
@@ -0,0 +1,5 @@
+P_64_UPROFILE({
+ bool sat = false;
+ rd = (sat_subu<uint64_t>(rs1, rs2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/uksub8.h b/riscv/insns/uksub8.h
new file mode 100644
index 0000000..3d47608
--- /dev/null
+++ b/riscv/insns/uksub8.h
@@ -0,0 +1,5 @@
+P_ULOOP(8, {
+ bool sat = false;
+ pd = (sat_subu<uint8_t>(ps1, ps2, sat));
+ P_SET_OV(sat);
+}) \ No newline at end of file
diff --git a/riscv/insns/uksubh.h b/riscv/insns/uksubh.h
new file mode 100644
index 0000000..be9ec45
--- /dev/null
+++ b/riscv/insns/uksubh.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_W(RS1, 0) - (sreg_t)P_W(RS2, 0);
+P_SATU(res, 16);
+WRITE_RD(sext_xlen((int16_t)res)); \ No newline at end of file
diff --git a/riscv/insns/uksubw.h b/riscv/insns/uksubw.h
new file mode 100644
index 0000000..8a088cd
--- /dev/null
+++ b/riscv/insns/uksubw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+sreg_t res = (sreg_t)P_W(RS1, 0) - (sreg_t)P_W(RS2, 0);
+P_SATU(res, 32);
+WRITE_RD(sext32(res)); \ No newline at end of file
diff --git a/riscv/insns/umaqa.h b/riscv/insns/umaqa.h
new file mode 100644
index 0000000..69cd048
--- /dev/null
+++ b/riscv/insns/umaqa.h
@@ -0,0 +1,3 @@
+P_REDUCTION_ULOOP(32, 8, true, false, {
+ pd_res += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umar64.h b/riscv/insns/umar64.h
new file mode 100644
index 0000000..a131104
--- /dev/null
+++ b/riscv/insns/umar64.h
@@ -0,0 +1,3 @@
+P_64_UPROFILE_REDUCTION(32, {
+ rd += ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umax16.h b/riscv/insns/umax16.h
new file mode 100644
index 0000000..8cc897b
--- /dev/null
+++ b/riscv/insns/umax16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umax32.h b/riscv/insns/umax32.h
new file mode 100644
index 0000000..77a57f5
--- /dev/null
+++ b/riscv/insns/umax32.h
@@ -0,0 +1,3 @@
+P_ULOOP(32, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umax8.h b/riscv/insns/umax8.h
new file mode 100644
index 0000000..22a89f0
--- /dev/null
+++ b/riscv/insns/umax8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 > ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umin16.h b/riscv/insns/umin16.h
new file mode 100644
index 0000000..8e7b6a4
--- /dev/null
+++ b/riscv/insns/umin16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umin32.h b/riscv/insns/umin32.h
new file mode 100644
index 0000000..6b5a6dc
--- /dev/null
+++ b/riscv/insns/umin32.h
@@ -0,0 +1,3 @@
+P_ULOOP(32, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umin8.h b/riscv/insns/umin8.h
new file mode 100644
index 0000000..9194fd7
--- /dev/null
+++ b/riscv/insns/umin8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 < ps2) ? ps1 : ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umsr64.h b/riscv/insns/umsr64.h
new file mode 100644
index 0000000..1b28267
--- /dev/null
+++ b/riscv/insns/umsr64.h
@@ -0,0 +1,3 @@
+P_64_UPROFILE_REDUCTION(32, {
+ rd -= ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umul16.h b/riscv/insns/umul16.h
new file mode 100644
index 0000000..1f5891d
--- /dev/null
+++ b/riscv/insns/umul16.h
@@ -0,0 +1,3 @@
+P_MUL_ULOOP(16, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umul8.h b/riscv/insns/umul8.h
new file mode 100644
index 0000000..d96ab73
--- /dev/null
+++ b/riscv/insns/umul8.h
@@ -0,0 +1,3 @@
+P_MUL_ULOOP(8, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umulx16.h b/riscv/insns/umulx16.h
new file mode 100644
index 0000000..e3dd5a3
--- /dev/null
+++ b/riscv/insns/umulx16.h
@@ -0,0 +1,3 @@
+P_MUL_CROSS_ULOOP(16, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/umulx8.h b/riscv/insns/umulx8.h
new file mode 100644
index 0000000..48062da
--- /dev/null
+++ b/riscv/insns/umulx8.h
@@ -0,0 +1,3 @@
+P_MUL_CROSS_ULOOP(8, {
+ pd = ps1 * ps2;
+}) \ No newline at end of file
diff --git a/riscv/insns/uradd16.h b/riscv/insns/uradd16.h
new file mode 100644
index 0000000..2349568
--- /dev/null
+++ b/riscv/insns/uradd16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/uradd32.h b/riscv/insns/uradd32.h
new file mode 100644
index 0000000..9b17e7c
--- /dev/null
+++ b/riscv/insns/uradd32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_ULOOP(32, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/uradd64.h b/riscv/insns/uradd64.h
new file mode 100644
index 0000000..3005c98
--- /dev/null
+++ b/riscv/insns/uradd64.h
@@ -0,0 +1,9 @@
+P_64_UPROFILE({
+ rd = rs1 + rs2;
+ if (rd < rs1) {
+ rd >>= 1;
+ rd |= ((reg_t)1 << 63);
+ } else {
+ rd >>= 1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/uradd8.h b/riscv/insns/uradd8.h
new file mode 100644
index 0000000..277d2d3
--- /dev/null
+++ b/riscv/insns/uradd8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/uraddw.h b/riscv/insns/uraddw.h
new file mode 100644
index 0000000..97c5710
--- /dev/null
+++ b/riscv/insns/uraddw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+reg_t res = (reg_t)P_W(RS1, 0) + (reg_t)P_W(RS2, 0);
+res >>= 1;
+WRITE_RD(sext_xlen((int32_t)res)); \ No newline at end of file
diff --git a/riscv/insns/urcras16.h b/riscv/insns/urcras16.h
new file mode 100644
index 0000000..592920d
--- /dev/null
+++ b/riscv/insns/urcras16.h
@@ -0,0 +1,5 @@
+P_CROSS_ULOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urcras32.h b/riscv/insns/urcras32.h
new file mode 100644
index 0000000..0e5a201
--- /dev/null
+++ b/riscv/insns/urcras32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urcrsa16.h b/riscv/insns/urcrsa16.h
new file mode 100644
index 0000000..65fe17b
--- /dev/null
+++ b/riscv/insns/urcrsa16.h
@@ -0,0 +1,5 @@
+P_CROSS_ULOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urcrsa32.h b/riscv/insns/urcrsa32.h
new file mode 100644
index 0000000..fda986f
--- /dev/null
+++ b/riscv/insns/urcrsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_CROSS_ULOOP(32, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urstas16.h b/riscv/insns/urstas16.h
new file mode 100644
index 0000000..9aa4697
--- /dev/null
+++ b/riscv/insns/urstas16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_ULOOP(16, {
+ pd = (ps1 + ps2) >> 1;
+}, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urstas32.h b/riscv/insns/urstas32.h
new file mode 100644
index 0000000..c79d3ce
--- /dev/null
+++ b/riscv/insns/urstas32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urstsa16.h b/riscv/insns/urstsa16.h
new file mode 100644
index 0000000..3aec9c9
--- /dev/null
+++ b/riscv/insns/urstsa16.h
@@ -0,0 +1,5 @@
+P_STRAIGHT_ULOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}, {
+ pd = (ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/urstsa32.h b/riscv/insns/urstsa32.h
new file mode 100644
index 0000000..577fba1
--- /dev/null
+++ b/riscv/insns/urstsa32.h
@@ -0,0 +1,6 @@
+require_rv64;
+P_STRAIGHT_ULOOP(32, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}, {
+ pd = ((uint64_t)ps1 + ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/ursub16.h b/riscv/insns/ursub16.h
new file mode 100644
index 0000000..153d8ec
--- /dev/null
+++ b/riscv/insns/ursub16.h
@@ -0,0 +1,3 @@
+P_ULOOP(16, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/ursub32.h b/riscv/insns/ursub32.h
new file mode 100644
index 0000000..d43e065
--- /dev/null
+++ b/riscv/insns/ursub32.h
@@ -0,0 +1,4 @@
+require_rv64;
+P_ULOOP(32, {
+ pd = ((uint64_t)ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/ursub64.h b/riscv/insns/ursub64.h
new file mode 100644
index 0000000..6354e3e
--- /dev/null
+++ b/riscv/insns/ursub64.h
@@ -0,0 +1,9 @@
+P_64_UPROFILE({
+ rd = rs1 - rs2;
+ if (rd > rs1) {
+ rd >>= 1;
+ rd |= ((reg_t)1 << 63);
+ } else {
+ rd >>= 1;
+ }
+}) \ No newline at end of file
diff --git a/riscv/insns/ursub8.h b/riscv/insns/ursub8.h
new file mode 100644
index 0000000..74dea5d
--- /dev/null
+++ b/riscv/insns/ursub8.h
@@ -0,0 +1,3 @@
+P_ULOOP(8, {
+ pd = (ps1 - ps2) >> 1;
+}) \ No newline at end of file
diff --git a/riscv/insns/ursubw.h b/riscv/insns/ursubw.h
new file mode 100644
index 0000000..2b27955
--- /dev/null
+++ b/riscv/insns/ursubw.h
@@ -0,0 +1,4 @@
+require_extension('P');
+reg_t res = (reg_t)P_W(RS1, 0) - (reg_t)P_W(RS2, 0);
+res >>= 1;
+WRITE_RD(sext_xlen((int32_t)res)); \ No newline at end of file
diff --git a/riscv/insns/wext.h b/riscv/insns/wext.h
new file mode 100644
index 0000000..b849011
--- /dev/null
+++ b/riscv/insns/wext.h
@@ -0,0 +1,4 @@
+require_extension('P');
+reg_t rs1 = (xlen == 32) ? RS1_PAIR : RS1;
+reg_t lsb = get_field(RS2, make_mask64(0, 5));
+WRITE_RD(sext32(get_field(rs1, make_mask64(lsb, 32)))); \ No newline at end of file
diff --git a/riscv/insns/wexti.h b/riscv/insns/wexti.h
new file mode 100644
index 0000000..752b173
--- /dev/null
+++ b/riscv/insns/wexti.h
@@ -0,0 +1,4 @@
+require_extension('P');
+reg_t rs1 = (xlen == 32) ? RS1_PAIR : RS1;
+reg_t lsb = insn.p_imm5();
+WRITE_RD(sext32(get_field(rs1, make_mask64(lsb, 32)))); \ No newline at end of file
diff --git a/riscv/insns/zunpkd810.h b/riscv/insns/zunpkd810.h
new file mode 100644
index 0000000..d5cf460
--- /dev/null
+++ b/riscv/insns/zunpkd810.h
@@ -0,0 +1 @@
+P_ZUNPKD8(1, 0) \ No newline at end of file
diff --git a/riscv/insns/zunpkd820.h b/riscv/insns/zunpkd820.h
new file mode 100644
index 0000000..149d1de
--- /dev/null
+++ b/riscv/insns/zunpkd820.h
@@ -0,0 +1 @@
+P_ZUNPKD8(2, 0) \ No newline at end of file
diff --git a/riscv/insns/zunpkd830.h b/riscv/insns/zunpkd830.h
new file mode 100644
index 0000000..aa1f554
--- /dev/null
+++ b/riscv/insns/zunpkd830.h
@@ -0,0 +1 @@
+P_ZUNPKD8(3, 0) \ No newline at end of file
diff --git a/riscv/insns/zunpkd831.h b/riscv/insns/zunpkd831.h
new file mode 100644
index 0000000..494e2f1
--- /dev/null
+++ b/riscv/insns/zunpkd831.h
@@ -0,0 +1 @@
+P_ZUNPKD8(3, 1) \ No newline at end of file
diff --git a/riscv/insns/zunpkd832.h b/riscv/insns/zunpkd832.h
new file mode 100644
index 0000000..42b01ea
--- /dev/null
+++ b/riscv/insns/zunpkd832.h
@@ -0,0 +1 @@
+P_ZUNPKD8(3, 2) \ No newline at end of file
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 12d6899..88ee0b7 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -210,7 +210,7 @@ void processor_t::parse_isa_string(const char* str)
char error_msg[256];
const char* p = lowercase.c_str();
- const char* all_subsets = "imafdqcbkh"
+ const char* all_subsets = "imafdqcbkhp"
#ifdef __SIZEOF_INT128__
"v"
#endif
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index 342ea56..055d6d1 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -907,6 +907,350 @@ riscv_insn_ext_h = \
hsv_w \
hsv_d \
+riscv_insn_ext_p_simd = \
+ add16 \
+ radd16 \
+ uradd16 \
+ kadd16 \
+ ukadd16 \
+ sub16 \
+ rsub16 \
+ ursub16 \
+ ksub16 \
+ uksub16 \
+ cras16 \
+ rcras16 \
+ urcras16 \
+ kcras16 \
+ ukcras16 \
+ crsa16 \
+ rcrsa16 \
+ urcrsa16 \
+ kcrsa16 \
+ ukcrsa16 \
+ stas16 \
+ rstas16 \
+ urstas16 \
+ kstas16 \
+ ukstas16 \
+ stsa16 \
+ rstsa16 \
+ urstsa16 \
+ kstsa16 \
+ ukstsa16 \
+ add8 \
+ radd8 \
+ uradd8 \
+ kadd8 \
+ ukadd8 \
+ sub8 \
+ rsub8 \
+ ursub8 \
+ ksub8 \
+ uksub8 \
+ sra16 \
+ srai16 \
+ sra16_u \
+ srai16_u \
+ srl16 \
+ srli16 \
+ srl16_u \
+ srli16_u \
+ sll16 \
+ slli16 \
+ ksll16 \
+ kslli16 \
+ kslra16 \
+ kslra16_u \
+ sra8 \
+ srai8 \
+ sra8_u \
+ srai8_u \
+ srl8 \
+ srli8 \
+ srl8_u \
+ srli8_u \
+ sll8 \
+ slli8 \
+ ksll8 \
+ kslli8 \
+ kslra8 \
+ kslra8_u \
+ cmpeq16 \
+ scmplt16 \
+ scmple16 \
+ ucmplt16 \
+ ucmple16 \
+ cmpeq8 \
+ scmplt8 \
+ scmple8 \
+ ucmplt8 \
+ ucmple8 \
+ smul16 \
+ smulx16 \
+ umul16 \
+ umulx16 \
+ khm16 \
+ khmx16 \
+ smul8 \
+ smulx8 \
+ umul8 \
+ umulx8 \
+ khm8 \
+ khmx8 \
+ smin16 \
+ umin16 \
+ smax16 \
+ umax16 \
+ sclip16 \
+ uclip16 \
+ kabs16 \
+ clrs16 \
+ clz16 \
+ clo16 \
+ swap16 \
+ smin8 \
+ umin8 \
+ smax8 \
+ umax8 \
+ sclip8 \
+ uclip8 \
+ kabs8 \
+ clrs8 \
+ clz8 \
+ clo8 \
+ swap8 \
+ sunpkd810 \
+ sunpkd820 \
+ sunpkd830 \
+ sunpkd831 \
+ sunpkd832 \
+ zunpkd810 \
+ zunpkd820 \
+ zunpkd830 \
+ zunpkd831 \
+ zunpkd832 \
+
+riscv_insn_ext_p_partial_simd = \
+ pkbb16 \
+ pkbt16 \
+ pktb16 \
+ pktt16 \
+ smmul \
+ smmul_u \
+ kmmac \
+ kmmac_u \
+ kmmsb \
+ kmmsb_u \
+ kwmmul \
+ kwmmul_u \
+ smmwb \
+ smmwb_u \
+ smmwt \
+ smmwt_u \
+ kmmawb \
+ kmmawb_u \
+ kmmawt \
+ kmmawt_u \
+ kmmwb2 \
+ kmmwb2_u \
+ kmmwt2 \
+ kmmwt2_u \
+ kmmawb2 \
+ kmmawb2_u \
+ kmmawt2 \
+ kmmawt2_u \
+ smbb16 \
+ smbt16 \
+ smtt16 \
+ kmda \
+ kmxda \
+ smds \
+ smdrs \
+ smxds \
+ kmabb \
+ kmabt \
+ kmatt \
+ kmada \
+ kmaxda \
+ kmads \
+ kmadrs \
+ kmaxds \
+ kmsda \
+ kmsxda \
+ smal \
+ sclip32 \
+ uclip32 \
+ clrs32 \
+ clz32 \
+ clo32 \
+ pbsad \
+ pbsada \
+ smaqa \
+ umaqa \
+ smaqa_su \
+
+riscv_insn_ext_p_64_bit_profile = \
+ add64 \
+ radd64 \
+ uradd64 \
+ kadd64 \
+ ukadd64 \
+ sub64 \
+ rsub64 \
+ ursub64 \
+ ksub64 \
+ uksub64 \
+ smar64 \
+ smsr64 \
+ umar64 \
+ umsr64 \
+ kmar64 \
+ kmsr64 \
+ ukmar64 \
+ ukmsr64 \
+ smalbb \
+ smalbt \
+ smaltt \
+ smalda \
+ smalxda \
+ smalds \
+ smaldrs \
+ smalxds \
+ smslda \
+ smslxda \
+
+riscv_insn_ext_p_non_simd = \
+ kaddh \
+ ksubh \
+ khmbb \
+ khmbt \
+ khmtt \
+ ukaddh \
+ uksubh \
+ kaddw \
+ ukaddw \
+ ksubw \
+ uksubw \
+ kdmbb \
+ kdmbt \
+ kdmtt \
+ kslraw \
+ kslraw_u \
+ ksllw \
+ kslliw \
+ kdmabb \
+ kdmabt \
+ kdmatt \
+ kabsw \
+ raddw \
+ uraddw \
+ rsubw \
+ ursubw \
+ maxw \
+ minw \
+ mulr64 \
+ mulsr64 \
+ msubr32 \
+ ave \
+ sra_u \
+ srai_u \
+ bitrev \
+ bitrevi \
+ wext \
+ wexti \
+ bpick \
+ insb \
+ maddr32 \
+
+riscv_insn_ext_p_rv64_only = \
+ add32 \
+ radd32 \
+ uradd32 \
+ kadd32 \
+ ukadd32 \
+ sub32 \
+ rsub32 \
+ ursub32 \
+ ksub32 \
+ uksub32 \
+ cras32 \
+ rcras32 \
+ urcras32 \
+ kcras32 \
+ ukcras32 \
+ crsa32 \
+ rcrsa32 \
+ urcrsa32 \
+ kcrsa32 \
+ ukcrsa32 \
+ stas32 \
+ rstas32 \
+ urstas32 \
+ kstas32 \
+ ukstas32 \
+ stsa32 \
+ rstsa32 \
+ urstsa32 \
+ kstsa32 \
+ ukstsa32 \
+ sra32 \
+ srai32 \
+ sra32_u \
+ srai32_u \
+ srl32 \
+ srli32 \
+ srl32_u \
+ srli32_u \
+ sll32 \
+ slli32 \
+ ksll32 \
+ kslli32 \
+ kslra32 \
+ kslra32_u \
+ smin32 \
+ umin32 \
+ smax32 \
+ umax32 \
+ kabs32 \
+ khmbb16 \
+ khmbt16 \
+ khmtt16 \
+ kdmbb16 \
+ kdmbt16 \
+ kdmtt16 \
+ kdmabb16 \
+ kdmabt16 \
+ kdmatt16 \
+ smbt32 \
+ smtt32 \
+ kmabb32 \
+ kmabt32 \
+ kmatt32 \
+ kmda32 \
+ kmxda32 \
+ kmaxda32 \
+ kmads32 \
+ kmadrs32 \
+ kmaxds32 \
+ kmsda32 \
+ kmsxda32 \
+ smds32 \
+ smdrs32 \
+ smxds32 \
+ sraiw_u \
+ pkbb32 \
+ pkbt32 \
+ pktb32 \
+ pktt32 \
+
+riscv_insn_ext_p = \
+ $(riscv_insn_ext_p_simd) \
+ $(riscv_insn_ext_p_partial_simd) \
+ $(riscv_insn_ext_p_64_bit_profile) \
+ $(riscv_insn_ext_p_non_simd) \
+ $(riscv_insn_ext_p_rv64_only) \
+
riscv_insn_priv = \
csrrc \
csrrci \
@@ -936,6 +1280,7 @@ riscv_insn_list = \
$(riscv_insn_ext_k) \
$(if $(HAVE_INT128),$(riscv_insn_ext_v),) \
$(riscv_insn_ext_h) \
+ $(riscv_insn_ext_p) \
$(riscv_insn_priv) \
riscv_gen_srcs = \