From f0984d4040c328d1c021ae6680479cbbe13c485b Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 17 Feb 2023 17:11:29 -0300 Subject: target/arm: move translate modules to tcg/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the target/arm/tcg directory. Its purpose is to hold the TCG code that is selected by CONFIG_TCG. Signed-off-by: Claudio Fontana Signed-off-by: Fabiano Rosas Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Signed-off-by: Peter Maydell --- target/arm/tcg/mve.decode | 832 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 832 insertions(+) create mode 100644 target/arm/tcg/mve.decode (limited to 'target/arm/tcg/mve.decode') diff --git a/target/arm/tcg/mve.decode b/target/arm/tcg/mve.decode new file mode 100644 index 0000000..14a4f39 --- /dev/null +++ b/target/arm/tcg/mve.decode @@ -0,0 +1,832 @@ +# M-profile MVE instruction descriptions +# +# Copyright (c) 2021 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see . + +# +# This file is processed by scripts/decodetree.py +# + +%qd 22:1 13:3 +%qm 5:1 1:3 +%qn 7:1 17:3 + +# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit +%size_28 28:1 !function=plus_1 + +# 2 operand fp insns have size in bit 20: 1 for 16 bit, 0 for 32 bit, +# like Neon FP insns. +%2op_fp_size 20:1 !function=neon_3same_fp_size +# VCADD is an exception, where bit 20 is 0 for 16 bit and 1 for 32 bit +%2op_fp_size_rev 20:1 !function=plus_1 +# FP scalars have size in bit 28, 1 for 16 bit, 0 for 32 bit +%2op_fp_scalar_size 28:1 !function=neon_3same_fp_size + +# 1imm format immediate +%imm_28_16_0 28:1 16:3 0:4 + +&vldr_vstr rn qd imm p a w size l u +&1op qd qm size +&2op qd qm qn size +&2scalar qd qn rm size +&1imm qd imm cmode op +&2shift qd qm shift size +&vidup qd rn size imm +&viwdup qd rn rm size imm +&vcmp qm qn size mask +&vcmp_scalar qn rm size mask +&shl_scalar qda rm size +&vmaxv qm rda size +&vabav qn qm rda size +&vldst_sg qd qm rn size msize os +&vldst_sg_imm qd qm a w imm +&vldst_il qd rn size pat w + +# scatter-gather memory size is in bits 6:4 +%sg_msize 6:1 4:1 + +@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0 +# Note that both Rn and Qd are 3 bits only (no D bit) +@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr + +@vldst_sg .... .... .... rn:4 .... ... size:2 ... ... os:1 &vldst_sg \ + qd=%qd qm=%qm msize=%sg_msize + +# Qm is in the fields usually labeled Qn +@vldst_sg_imm .... .... a:1 . w:1 . .... .... .... . imm:7 &vldst_sg_imm \ + qd=%qd qm=%qn + +# Deinterleaving load/interleaving store +@vldst_il .... .... .. w:1 . rn:4 .... ... size:2 pat:2 ..... &vldst_il \ + qd=%qd + +@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm +@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0 +@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn +@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0 +@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \ + size=%size_28 +@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0 + +# The _rev suffix indicates that Vn and Vm are reversed. This is +# the case for shifts. In the Arm ARM these insns are documented +# with the Vm and Vn fields in their usual places, but in the +# assembly the operands are listed "backwards", ie in the order +# Qd, Qm, Qn where other insns use Qd, Qn, Qm. For QEMU we choose +# to consider Vm and Vn as being in different fields in the insn. +# This gives us consistency with A64 and Neon. +@2op_rev .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qn qn=%qm + +@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn +@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn + +@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0 +@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 +@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2 + +@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0 +@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 +# VSHLL encoding T2 where shift == esize +@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=0 shift=8 +@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=1 shift=16 + +# Right shifts are encoded as N - shift, where N is the element size in bits. +%rshift_i5 16:5 !function=rsub_32 +%rshift_i4 16:4 !function=rsub_16 +%rshift_i3 16:3 !function=rsub_8 + +@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=0 shift=%rshift_i3 +@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=1 shift=%rshift_i4 +@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \ + size=2 shift=%rshift_i5 + +@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd + +# Vector comparison; 4-bit Qm but 3-bit Qn +%mask_22_13 22:1 13:3 +@vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13 +@vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \ + mask=%mask_22_13 + +@vcmp_fp .... .... .... qn:3 . .... .... .... .... &vcmp \ + qm=%qm size=%2op_fp_scalar_size mask=%mask_22_13 + +# Bit 28 is a 2op_fp_scalar_size bit, but we do not decode it in this +# format to avoid complicated overlapping-instruction-groups +@vcmp_fp_scalar .... .... .... qn:3 . .... .... .... rm:4 &vcmp_scalar \ + mask=%mask_22_13 + +@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm + +@2op_fp .... .... .... .... .... .... .... .... &2op \ + qd=%qd qn=%qn qm=%qm size=%2op_fp_size + +@2op_fp_size_rev .... .... .... .... .... .... .... .... &2op \ + qd=%qd qn=%qn qm=%qm size=%2op_fp_size_rev + +# 2-operand, but Qd and Qn share a field. Size is in bit 28, but we +# don't decode it in this format +@vmaxnma .... .... .... .... .... .... .... .... &2op \ + qd=%qd qn=%qd qm=%qm + +# Here also we don't decode the bit 28 size in the format to avoid +# awkward nested overlap groups +@vmaxnmv .... .... .... .... rda:4 .... .... .... &vmaxv qm=%qm + +@2op_fp_scalar .... .... .... .... .... .... .... rm:4 &2scalar \ + qd=%qd qn=%qn size=%2op_fp_scalar_size + +# Vector loads and stores + +# Widening loads and narrowing stores: +# for these P=0 W=0 is 'related encoding'; sz=11 is 'related encoding' +# This means we need to expand out to multiple patterns for P, W, SZ. +# For stores the U bit must be 0 but we catch that in the trans_ function. +# The naming scheme here is "VLDSTB_H == in-memory byte load/store to/from +# signed halfword element in register", etc. +VLDSTB_H 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 01 ....... @vldst_wn \ + p=0 w=1 size=1 +VLDSTB_H 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 01 ....... @vldst_wn \ + p=1 size=1 +VLDSTB_W 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 10 ....... @vldst_wn \ + p=0 w=1 size=2 +VLDSTB_W 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 10 ....... @vldst_wn \ + p=1 size=2 +VLDSTH_W 111 . 110 0 a:1 0 1 . 1 ... ... 0 111 10 ....... @vldst_wn \ + p=0 w=1 size=2 +VLDSTH_W 111 . 110 1 a:1 0 w:1 . 1 ... ... 0 111 10 ....... @vldst_wn \ + p=1 size=2 + +# Non-widening loads/stores (P=0 W=0 is 'related encoding') +VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111100 ....... @vldr_vstr \ + size=0 p=0 w=1 +VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111101 ....... @vldr_vstr \ + size=1 p=0 w=1 +VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111110 ....... @vldr_vstr \ + size=2 p=0 w=1 +VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111100 ....... @vldr_vstr \ + size=0 p=1 +VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \ + size=1 p=1 +VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \ + size=2 p=1 + +# gather loads/scatter stores +VLDR_S_sg 111 0 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg +VLDR_U_sg 111 1 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg +VSTR_sg 111 0 1100 1 . 00 .... ... 0 111 . .... .... @vldst_sg + +VLDRW_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1110 .... .... @vldst_sg_imm +VLDRD_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1111 .... .... @vldst_sg_imm +VSTRW_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1110 .... .... @vldst_sg_imm +VSTRD_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1111 .... .... @vldst_sg_imm + +# deinterleaving loads/interleaving stores +VLD2 1111 1100 1 .. 1 .... ... 1 111 .. .. 00000 @vldst_il +VLD4 1111 1100 1 .. 1 .... ... 1 111 .. .. 00001 @vldst_il +VST2 1111 1100 1 .. 0 .... ... 1 111 .. .. 00000 @vldst_il +VST4 1111 1100 1 .. 0 .... ... 1 111 .. .. 00001 @vldst_il + +# Moves between 2 32-bit vector lanes and 2 general purpose registers +VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd +VMOV_from_2gp 1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd + +# Vector 2-op +VAND 1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz +VBIC 1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz +VORR 1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz +VORN 1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz +VEOR 1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz + +VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op +VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op +VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op + +# The VSHLL T2 encoding is not a @2op pattern, but is here because it +# overlaps what would be size=0b11 VMULH/VRMULH +{ + VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz + + VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2 + + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VQMOVUNB 111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op + VQMOVN_BS 111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op + + VMAXA 111 0 1110 0 . 11 .. 11 ... 0 1110 1 0 . 0 ... 1 @1op + + VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz + + VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1 + + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VMOVNB 111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op + VQMOVN_BU 111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op + + VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz + + VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2 + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VQMOVUNT 111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op + VQMOVN_TS 111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op + + VMINA 111 0 1110 0 . 11 .. 11 ... 1 1110 1 0 . 0 ... 1 @1op + + VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} + +{ + VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz + + VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1 + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VMOVNT 111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op + VQMOVN_TU 111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op + + VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} + +VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op +VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op +VMIN_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op +VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op + +VABD_S 111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op +VABD_U 111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op + +VHADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op +VHADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op +VHSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op +VHSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op + +{ + VMULLP_B 111 . 1110 0 . 11 ... 1 ... 0 1110 . 0 . 0 ... 0 @2op_sz28 + VMULL_BS 111 0 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op + VMULL_BU 111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op +} +{ + VMULLP_T 111 . 1110 0 . 11 ... 1 ... 1 1110 . 0 . 0 ... 0 @2op_sz28 + VMULL_TS 111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op + VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op +} + +VQDMULH 1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op +VQRDMULH 1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op + +VQADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op +VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op +VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op +VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op + +VSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev +VSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev + +VRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev +VRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev + +VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev +VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev + +VQRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev +VQRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev + +{ + VCMUL0 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 0 @2op_sz28 + VQDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op + VQDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op +} + +{ + VCMUL180 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 0 @2op_sz28 + VQDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op + VQDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op +} + +{ + VCMUL90 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 1 @2op_sz28 + VQRDMLADH 111 0 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op + VQRDMLSDH 111 1 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VCMUL270 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 1 @2op_sz28 + VQRDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op + VQRDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op +} + +VQDMULLB 111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28 +VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28 + +VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op +VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op + +{ + VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz + VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz + VHCADD90 1110 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op + VHCADD270 1110 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op +} + +{ + VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz + VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz + VCADD90 1111 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op + VCADD270 1111 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op +} + +# Vector miscellaneous + +VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op +VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op + +VREV16 1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op +VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op +VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op + +VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz + +VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op +VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op +VNEG 1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op +VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op + +VQABS 1111 1111 1 . 11 .. 00 ... 0 0111 01 . 0 ... 0 @1op +VQNEG 1111 1111 1 . 11 .. 00 ... 0 0111 11 . 0 ... 0 @1op + +&vdup qd rt size +# Qd is in the fields usually named Qn +@vdup .... .... . . .. ... . rt:4 .... . . . . .... qd=%qn &vdup + +# B and E bits encode size, which we decode here to the usual size values +VDUP 1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0 +VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1 +VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2 + +# Incrementing and decrementing dup + +# VIDUP, VDDUP format immediate: 1 << (immh:imml) +%imm_vidup 7:1 0:1 !function=vidup_imm + +# VIDUP, VDDUP registers: Rm bits [3:1] from insn, bit 0 is 1; +# Rn bits [3:1] from insn, bit 0 is 0 +%vidup_rm 1:3 !function=times_2_plus_1 +%vidup_rn 17:3 !function=times_2 + +@vidup .... .... . . size:2 .... .... .... .... .... \ + qd=%qd imm=%imm_vidup rn=%vidup_rn &vidup +@viwdup .... .... . . size:2 .... .... .... .... .... \ + qd=%qd imm=%imm_vidup rm=%vidup_rm rn=%vidup_rn &viwdup +{ + VIDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 111 . @vidup + VIWDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 ... . @viwdup +} +{ + VCMPGT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=2 + VCMPLE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=2 + VDDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 111 . @vidup + VDWDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 ... . @viwdup +} + +# multiply-add long dual accumulate +# rdahi: bits [3:1] from insn, bit 0 is 1 +# rdalo: bits [3:1] from insn, bit 0 is 0 +%rdahi 20:3 !function=times_2_plus_1 +%rdalo 13:3 !function=times_2 +# size bit is 0 for 16 bit, 1 for 32 bit +%size_16 16:1 !function=plus_1 + +&vmlaldav rdahi rdalo size qn qm x a +&vmladav rda size qn qm x a + +@vmlaldav .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \ + qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav +@vmlaldav_nosz .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \ + qn=%qn rdahi=%rdahi rdalo=%rdalo size=0 &vmlaldav +@vmladav .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \ + qn=%qn rda=%rdalo size=%size_16 &vmladav +@vmladav_nosz .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \ + qn=%qn rda=%rdalo size=0 &vmladav + +{ + VMLADAV_S 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav + VMLALDAV_S 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav +} +{ + VMLADAV_U 1111 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav + VMLALDAV_U 1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav +} + +{ + VMLSDAV 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 1 @vmladav + VMLSLDAV 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav +} + +{ + VMLSDAV 1111 1110 1111 ... 0 ... . 1110 . 0 . 0 ... 1 @vmladav_nosz + VRMLSLDAVH 1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz +} + +VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz +VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz + +{ + [ + VMAXNMAV 1110 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2 + VMINNMAV 1110 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2 + VMAXNMV 1110 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2 + VMINNMV 1110 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2 + ] + [ + VMAXV_S 1110 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINV_S 1110 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv + VMAXAV 1110 1110 1110 .. 00 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINAV 1110 1110 1110 .. 00 .... 1111 1 0 . 0 ... 0 @vmaxv + ] + VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz + VRMLALDAVH_S 1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz +} + +{ + [ + VMAXNMAV 1111 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1 + VMINNMAV 1111 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1 + VMAXNMV 1111 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1 + VMINNMV 1111 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1 + ] + [ + VMAXV_U 1111 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINV_U 1111 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv + ] + VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz + VRMLALDAVH_U 1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz +} + +# Scalar operations + +{ + VCMPEQ_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=2 + VCMPNE_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=2 + VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar +} + +{ + VCMPLT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=2 + VCMPGE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=2 + VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar +} + +{ + VSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar + VRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar + VQSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar + VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar + VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar +} + +{ + VSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar + VRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar + VQSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar + VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar + VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar +} + +{ + VADD_fp_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 100 .... @2op_fp_scalar + VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar + VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar +} + +{ + VSUB_fp_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 100 .... @2op_fp_scalar + VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar + VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar +} + +{ + VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar + VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar + VQDMULLB_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 110 .... @2scalar_nosz \ + size=%size_28 +} + +{ + VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar + VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar + VQDMULLT_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 110 .... @2scalar_nosz \ + size=%size_28 +} + +{ + VMUL_fp_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 110 .... @2op_fp_scalar + VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar + VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar +} + +{ + VFMA_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 100 .... @2op_fp_scalar + # The U bit (28) is don't-care because it does not affect the result + VMLA 111 - 1110 0 . .. ... 1 ... 0 1110 . 100 .... @2scalar +} + +{ + VFMAS_scalar 111 . 1110 0 . 11 ... 1 ... 1 1110 . 100 .... @2op_fp_scalar + # The U bit (28) is don't-care because it does not affect the result + VMLAS 111 - 1110 0 . .. ... 1 ... 1 1110 . 100 .... @2scalar +} + +VQRDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 100 .... @2scalar +VQRDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 100 .... @2scalar +VQDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 110 .... @2scalar +VQDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 110 .... @2scalar + +# Vector add across vector +{ + VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo + VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \ + rdahi=%rdahi rdalo=%rdalo +} + +@vabav .... .... .. size:2 .... rda:4 .... .... .... &vabav qn=%qn qm=%qm + +VABAV_S 111 0 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav +VABAV_U 111 1 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav + +# Logical immediate operations (1 reg and modified-immediate) + +# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but +# not in a way we can conveniently represent in decodetree without +# a lot of repetition: +# VORR: op=0, (cmode & 1) && cmode < 12 +# VBIC: op=1, (cmode & 1) && cmode < 12 +# VMOV: everything else +# So we have a single decode line and check the cmode/op in the +# trans function. +Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm + +# Shifts by immediate + +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h +VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w + +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h +VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w + +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h +VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w + +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h +VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w + +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h +VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w + +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h +VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w + +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h +VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w + +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h +VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w + +# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file +# Note that VMOVL is encoded as "VSHLL with a zero shift count"; we +# implement it that way rather than special-casing it in the decode. +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h + +# Shift-and-insert +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h +VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w + +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h +VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w + +# Narrowing shifts (which only support b and h sizes) +VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b +VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h +VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b +VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h + +VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b +VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h +VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b +VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h + +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h + +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h + +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h + +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h + +VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd + +# Comparisons. We expand out the conditions which are split across +# encodings T1, T2, T3 and the fc bits. These include VPT, which is +# effectively "VCMP then VPST". A plain "VCMP" has a mask field of zero. +{ + VCMPEQ_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp_fp + VCMPEQ 111 1 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp +} + +{ + VCMPNE_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp_fp + VCMPNE 111 1 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp +} + +{ + VCMPGE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp_fp + VCMPGE 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp +} + +{ + VCMPLT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp_fp + VCMPLT 111 1 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp +} + +{ + VCMPGT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp_fp + VCMPGT 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp +} + +{ + VCMPLE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp_fp + VCMPLE 1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp +} + +{ + VPSEL 1111 1110 0 . 11 ... 1 ... 0 1111 . 0 . 0 ... 1 @2op_nosz + VCMPCS 1111 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 1 @vcmp + VCMPHI 1111 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 1 @vcmp +} + +{ + VPNOT 1111 1110 0 0 11 000 1 000 0 1111 0100 1101 + VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13 + VCMPEQ_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=1 + VCMPEQ_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0100 .... @vcmp_scalar +} + +{ + VCMPNE_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=1 + VCMPNE_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1100 .... @vcmp_scalar +} + +{ + VCMPGT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=1 + VCMPGT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0110 .... @vcmp_scalar +} + +{ + VCMPLE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=1 + VCMPLE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1110 .... @vcmp_scalar +} + +{ + VCMPGE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=1 + VCMPGE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0100 .... @vcmp_scalar +} +{ + VCMPLT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=1 + VCMPLT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1100 .... @vcmp_scalar +} + +VCMPCS_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0 1 1 0 .... @vcmp_scalar +VCMPHI_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1 1 1 0 .... @vcmp_scalar + +# 2-operand FP +VADD_fp 1110 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp +VSUB_fp 1110 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp +VMUL_fp 1111 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 1 ... 0 @2op_fp +VABD_fp 1111 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp + +VMAXNM 1111 1111 0 . 0 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp +VMINNM 1111 1111 0 . 1 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp + +VCADD90_fp 1111 1100 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev +VCADD270_fp 1111 1101 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev + +VFMA 1110 1111 0 . 0 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp +VFMS 1110 1111 0 . 1 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp + +VCMLA0 1111 110 00 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev +VCMLA90 1111 110 01 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev +VCMLA180 1111 110 10 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev +VCMLA270 1111 110 11 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev + +# floating-point <-> fixed-point conversions. Naming convention: +# VCVT_, S = signed int, U = unsigned int, H = halfprec, F = singleprec +@vcvt .... .... .. 1 ..... .... .. 1 . .... .... &2shift \ + qd=%qd qm=%qm shift=%rshift_i5 size=2 +@vcvt_f16 .... .... .. 11 .... .... .. 0 . .... .... &2shift \ + qd=%qd qm=%qm shift=%rshift_i4 size=1 + +VCVT_SH_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16 +VCVT_UH_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16 + +VCVT_HS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16 +VCVT_HU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16 + +VCVT_SF_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt +VCVT_UF_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt + +VCVT_FS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt +VCVT_FU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt + +# VCVT between floating point and integer (halfprec and single); +# VCVT_, S = signed int, U = unsigned int, F = float +VCVT_SF 1111 1111 1 . 11 .. 11 ... 0 011 00 1 . 0 ... 0 @1op +VCVT_UF 1111 1111 1 . 11 .. 11 ... 0 011 01 1 . 0 ... 0 @1op +VCVT_FS 1111 1111 1 . 11 .. 11 ... 0 011 10 1 . 0 ... 0 @1op +VCVT_FU 1111 1111 1 . 11 .. 11 ... 0 011 11 1 . 0 ... 0 @1op + +# VCVT from floating point to integer with specified rounding mode +VCVTAS 1111 1111 1 . 11 .. 11 ... 000 00 0 1 . 0 ... 0 @1op +VCVTAU 1111 1111 1 . 11 .. 11 ... 000 00 1 1 . 0 ... 0 @1op +VCVTNS 1111 1111 1 . 11 .. 11 ... 000 01 0 1 . 0 ... 0 @1op +VCVTNU 1111 1111 1 . 11 .. 11 ... 000 01 1 1 . 0 ... 0 @1op +VCVTPS 1111 1111 1 . 11 .. 11 ... 000 10 0 1 . 0 ... 0 @1op +VCVTPU 1111 1111 1 . 11 .. 11 ... 000 10 1 1 . 0 ... 0 @1op +VCVTMS 1111 1111 1 . 11 .. 11 ... 000 11 0 1 . 0 ... 0 @1op +VCVTMU 1111 1111 1 . 11 .. 11 ... 000 11 1 1 . 0 ... 0 @1op + +VRINTN 1111 1111 1 . 11 .. 10 ... 001 000 1 . 0 ... 0 @1op +VRINTX 1111 1111 1 . 11 .. 10 ... 001 001 1 . 0 ... 0 @1op +VRINTA 1111 1111 1 . 11 .. 10 ... 001 010 1 . 0 ... 0 @1op +VRINTZ 1111 1111 1 . 11 .. 10 ... 001 011 1 . 0 ... 0 @1op +VRINTM 1111 1111 1 . 11 .. 10 ... 001 101 1 . 0 ... 0 @1op +VRINTP 1111 1111 1 . 11 .. 10 ... 001 111 1 . 0 ... 0 @1op -- cgit v1.1