aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/aarch64-sve2.md
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-01-09 16:36:42 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2020-01-09 16:36:42 +0000
commit0a09a9483825233f16e5b26bb0ffee76752339fc (patch)
treefe83aaee3f4299895706f7e0e40af7ebc6ab3d6d /gcc/config/aarch64/aarch64-sve2.md
parentf3582fda783496cc268467973c2c9860cd159b3d (diff)
downloadgcc-0a09a9483825233f16e5b26bb0ffee76752339fc.zip
gcc-0a09a9483825233f16e5b26bb0ffee76752339fc.tar.gz
gcc-0a09a9483825233f16e5b26bb0ffee76752339fc.tar.bz2
[AArch64] Add support for the SVE2 ACLE
This patch adds support for the SVE2 ACLE, The implementation and tests follow the same pattern as the exiting SVE ACLE support. 2020-01-09 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config.gcc (aarch64*-*-*): Add aarch64-sve-builtins-sve2.o to extra_objs. * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on aarch64-sve-builtins-base.def, aarch64-sve-builtins-sve2.def and aarch64-sve-builtins-sve2.h. (aarch64-sve-builtins-sve2.o): New rule. * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_AES): New macro. (AARCH64_ISA_SVE2_BITPERM, AARCH64_ISA_SVE2_SHA3): Likewise. (AARCH64_ISA_SVE2_SM4, TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Likewise. (TARGET_SVE2_SHA, TARGET_SVE2_SM4): Likewise. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Handle TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3 and TARGET_SVE2_SM4. * config/aarch64/aarch64-sve.md: Update comments with SVE2 instructions that are handled here. (@cond_asrd<mode>): Generalize to... (@cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>): ...this. (*cond_asrd<mode>_2): Generalize to... (*cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>_2): ...this. (*cond_asrd<mode>_z): Generalize to... (*cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>_z): ...this. * config/aarch64/aarch64.md (UNSPEC_LDNT1_GATHER): New unspec. (UNSPEC_STNT1_SCATTER, UNSPEC_WHILEGE, UNSPEC_WHILEGT): Likewise. (UNSPEC_WHILEHI, UNSPEC_WHILEHS): Likewise. * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>): New pattern. (@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>) (@aarch64_scatter_stnt<mode>): Likewise. (@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>) (@aarch64_mul_lane_<mode>): Likewise. (@aarch64_sve_suqadd<mode>_const): Likewise. (*<sur>h<addsub><mode>): Generalize to... (@aarch64_pred_<SVE2_COND_INT_BINARY_REV:sve_int_op><mode>): ...this new pattern. (@cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>): New expander. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_3): Likewise. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_any): Likewise. (*cond_<SVE2_COND_INT_BINARY_NOREV:sve_int_op><mode>_z): Likewise. (@aarch64_sve_<SVE2_INT_BINARY:sve_int_op><mode>):: Likewise. (@aarch64_sve_<SVE2_INT_BINARY:sve_int_op>_lane_<mode>): Likewise. (@aarch64_pred_<SVE2_COND_INT_SHIFT:sve_int_op><mode>): Likewise. (@cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>): New expander. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_3): Likewise. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_any): Likewise. (@aarch64_sve_<SVE2_INT_TERNARY:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_TERNARY_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_add_mul_lane_<mode>): Likewise. (@aarch64_sve_sub_mul_lane_<mode>): Likewise. (@aarch64_sve2_xar<mode>): Likewise. (@aarch64_sve2_bcax<mode>): Likewise. (*aarch64_sve2_eor3<mode>): Rename to... (@aarch64_sve2_eor3<mode>): ...this. (@aarch64_sve2_bsl<mode>): New expander. (@aarch64_sve2_nbsl<mode>): Likewise. (@aarch64_sve2_bsl1n<mode>): Likewise. (@aarch64_sve2_bsl2n<mode>): Likewise. (@aarch64_sve_add_<SHIFTRT:sve_int_op><mode>): Likewise. (*aarch64_sve2_sra<mode>): Add MOVPRFX support. (@aarch64_sve_add_<VRSHR_N:sve_int_op><mode>): New pattern. (@aarch64_sve_<SVE2_INT_SHIFT_INSERT:sve_int_op><mode>): Likewise. (@aarch64_sve2_<USMAX:su>aba<mode>): New expander. (*aarch64_sve2_<USMAX:su>aba<mode>): New pattern. (@aarch64_sve_<SVE2_INT_BINARY_WIDE:sve_int_op><mode>): Likewise. (<su>mull<bt><Vwide>): Generalize to... (@aarch64_sve_<SVE2_INT_BINARY_LONG:sve_int_op><mode>): ...this new pattern. (@aarch64_sve_<SVE2_INT_BINARY_LONG_lANE:sve_int_op>_lane_<mode>) (@aarch64_sve_<SVE2_INT_SHIFT_IMM_LONG:sve_int_op><mode>) (@aarch64_sve_add_<SVE2_INT_ADD_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_add_<SVE2_INT_ADD_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_qadd_<SVE2_INT_QADD_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_qadd_<SVE2_INT_QADD_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_sub_<SVE2_INT_SUB_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_sub_<SVE2_INT_SUB_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_qsub_<SVE2_INT_QSUB_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_qsub_<SVE2_INT_QSUB_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_<SVE2_FP_TERNARY_LONG:sve_fp_op><mode>): New patterns. (@aarch64_<SVE2_FP_TERNARY_LONG_LANE:sve_fp_op>_lane_<mode>) (@aarch64_sve_<SVE2_INT_UNARY_NARROWB:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_UNARY_NARROWT:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_BINARY_NARROWB:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_BINARY_NARROWT:sve_int_op><mode>): Likewise. (<SHRNB:r>shrnb<mode>): Generalize to... (@aarch64_sve_<SVE2_INT_SHIFT_IMM_NARROWB:sve_int_op><mode>): ...this new pattern. (<SHRNT:r>shrnt<mode>): Generalize to... (@aarch64_sve_<SVE2_INT_SHIFT_IMM_NARROWT:sve_int_op><mode>): ...this new pattern. (@aarch64_pred_<SVE2_INT_BINARY_PAIR:sve_int_op><mode>): New pattern. (@aarch64_pred_<SVE2_FP_BINARY_PAIR:sve_fp_op><mode>): Likewise. (@cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>): New expander. (*cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>_z): Likewise. (@aarch64_sve_<SVE2_INT_CADD:optab><mode>): Likewise. (@aarch64_sve_<SVE2_INT_CMLA:optab><mode>): Likewise. (@aarch64_<SVE2_INT_CMLA:optab>_lane_<mode>): Likewise. (@aarch64_sve_<SVE2_INT_CDOT:optab><mode>): Likewise. (@aarch64_<SVE2_INT_CDOT:optab>_lane_<mode>): Likewise. (@aarch64_pred_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): New pattern. (@aarch64_sve2_cvtnt<mode>): Likewise. (@aarch64_pred_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>_any): New pattern. (@aarch64_sve2_cvtxnt<mode>): Likewise. (@aarch64_pred_<SVE2_U32_UNARY:sve_int_op><mode>): Likewise. (@cond_<SVE2_U32_UNARY:sve_int_op><mode>): New expander. (*cond_<SVE2_U32_UNARY:sve_int_op><mode>): New pattern. (@aarch64_pred_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): New pattern. (@aarch64_sve2_pmul<mode>): Likewise. (@aarch64_sve_<SVE2_PMULL:optab><mode>): Likewise. (@aarch64_sve_<SVE2_PMULL_PAIR:optab><mode>): Likewise. (@aarch64_sve2_tbl2<mode>): Likewise. (@aarch64_sve2_tbx<mode>): Likewise. (@aarch64_sve_<SVE2_INT_BITPERM:sve_int_op><mode>): Likewise. (@aarch64_sve2_histcnt<mode>): Likewise. (@aarch64_sve2_histseg<mode>): Likewise. (@aarch64_pred_<SVE2_MATCH:sve_int_op><mode>): Likewise. (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_cc): Likewise. (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_ptest): Likewise. (aarch64_sve2_aes<CRYPTO_AES:aes_op>): Likewise. (aarch64_sve2_aes<CRYPTO_AESMC:aesmc_op>): Likewise. (*aarch64_sve2_aese_fused, *aarch64_sve2_aesd_fused): Likewise. (aarch64_sve2_rax1, aarch64_sve2_sm4e, aarch64_sve2_sm4ekey): Likewise. (<su>mulh<r>s<mode>3): Update after above pattern name changes. * config/aarch64/iterators.md (VNx16QI_ONLY, VNx4SF_ONLY) (SVE_STRUCT2, SVE_FULL_BHI, SVE_FULL_HSI, SVE_FULL_HDI) (SVE2_PMULL_PAIR_I): New mode iterators. (UNSPEC_ADCLB, UNSPEC_ADCLT, UNSPEC_ADDHNB, UNSPEC_ADDHNT, UNSPEC_BDEP) (UNSPEC_BEXT, UNSPEC_BGRP, UNSPEC_CADD90, UNSPEC_CADD270, UNSPEC_CDOT) (UNSPEC_CDOT90, UNSPEC_CDOT180, UNSPEC_CDOT270, UNSPEC_CMLA) (UNSPEC_CMLA90, UNSPEC_CMLA180, UNSPEC_CMLA270, UNSPEC_COND_FCVTLT) (UNSPEC_COND_FCVTNT, UNSPEC_COND_FCVTX, UNSPEC_COND_FCVTXNT) (UNSPEC_COND_FLOGB, UNSPEC_EORBT, UNSPEC_EORTB, UNSPEC_FADDP) (UNSPEC_FMAXP, UNSPEC_FMAXNMP, UNSPEC_FMLALB, UNSPEC_FMLALT) (UNSPEC_FMLSLB, UNSPEC_FMLSLT, UNSPEC_FMINP, UNSPEC_FMINNMP) (UNSPEC_HISTCNT, UNSPEC_HISTSEG, UNSPEC_MATCH, UNSPEC_NMATCH) (UNSPEC_PMULLB, UNSPEC_PMULLB_PAIR, UNSPEC_PMULLT, UNSPEC_PMULLT_PAIR) (UNSPEC_RADDHNB, UNSPEC_RADDHNT, UNSPEC_RSUBHNB, UNSPEC_RSUBHNT) (UNSPEC_SLI, UNSPEC_SRI, UNSPEC_SABDLB, UNSPEC_SABDLT, UNSPEC_SADDLB) (UNSPEC_SADDLBT, UNSPEC_SADDLT, UNSPEC_SADDWB, UNSPEC_SADDWT) (UNSPEC_SBCLB, UNSPEC_SBCLT, UNSPEC_SMAXP, UNSPEC_SMINP) (UNSPEC_SQCADD90, UNSPEC_SQCADD270, UNSPEC_SQDMULLB, UNSPEC_SQDMULLBT) (UNSPEC_SQDMULLT, UNSPEC_SQRDCMLAH, UNSPEC_SQRDCMLAH90) (UNSPEC_SQRDCMLAH180, UNSPEC_SQRDCMLAH270, UNSPEC_SQRSHRNB) (UNSPEC_SQRSHRNT, UNSPEC_SQRSHRUNB, UNSPEC_SQRSHRUNT, UNSPEC_SQSHRNB) (UNSPEC_SQSHRNT, UNSPEC_SQSHRUNB, UNSPEC_SQSHRUNT, UNSPEC_SQXTNB) (UNSPEC_SQXTNT, UNSPEC_SQXTUNB, UNSPEC_SQXTUNT, UNSPEC_SSHLLB) (UNSPEC_SSHLLT, UNSPEC_SSUBLB, UNSPEC_SSUBLBT, UNSPEC_SSUBLT) (UNSPEC_SSUBLTB, UNSPEC_SSUBWB, UNSPEC_SSUBWT, UNSPEC_SUBHNB) (UNSPEC_SUBHNT, UNSPEC_TBL2, UNSPEC_UABDLB, UNSPEC_UABDLT) (UNSPEC_UADDLB, UNSPEC_UADDLT, UNSPEC_UADDWB, UNSPEC_UADDWT) (UNSPEC_UMAXP, UNSPEC_UMINP, UNSPEC_UQRSHRNB, UNSPEC_UQRSHRNT) (UNSPEC_UQSHRNB, UNSPEC_UQSHRNT, UNSPEC_UQXTNB, UNSPEC_UQXTNT) (UNSPEC_USHLLB, UNSPEC_USHLLT, UNSPEC_USUBLB, UNSPEC_USUBLT) (UNSPEC_USUBWB, UNSPEC_USUBWT): New unspecs. (UNSPEC_SMULLB, UNSPEC_SMULLT, UNSPEC_UMULLB, UNSPEC_UMULLT) (UNSPEC_SMULHS, UNSPEC_SMULHRS, UNSPEC_UMULHS, UNSPEC_UMULHRS) (UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SHRNB, UNSPEC_SHRNT): Move further down file. (VNARROW, Ventype): New mode attributes. (Vewtype): Handle VNx2DI. Fix typo in comment. (VDOUBLE): New mode attribute. (sve_lane_con): Handle VNx8HI. (SVE_INT_UNARY): Include ss_abs and ss_neg for TARGET_SVE2. (SVE_INT_BINARY): Likewise ss_plus, us_plus, ss_minus and us_minus. (sve_int_op, sve_int_op_rev): Handle the above codes. (sve_pred_int_rhs2_operand): Likewise. (MULLBT, SHRNB, SHRNT): Delete. (SVE_INT_SHIFT_IMM): New int iterator. (SVE_WHILE): Add UNSPEC_WHILEGE, UNSPEC_WHILEGT, UNSPEC_WHILEHI and UNSPEC_WHILEHS for TARGET_SVE2. (SVE2_U32_UNARY, SVE2_INT_UNARY_NARROWB, SVE2_INT_UNARY_NARROWT) (SVE2_INT_BINARY, SVE2_INT_BINARY_LANE, SVE2_INT_BINARY_LONG) (SVE2_INT_BINARY_LONG_LANE, SVE2_INT_BINARY_NARROWB) (SVE2_INT_BINARY_NARROWT, SVE2_INT_BINARY_PAIR, SVE2_FP_BINARY_PAIR) (SVE2_INT_BINARY_PAIR_LONG, SVE2_INT_BINARY_WIDE): New int iterators. (SVE2_INT_SHIFT_IMM_LONG, SVE2_INT_SHIFT_IMM_NARROWB): Likewise. (SVE2_INT_SHIFT_IMM_NARROWT, SVE2_INT_SHIFT_INSERT, SVE2_INT_CADD) (SVE2_INT_BITPERM, SVE2_INT_TERNARY, SVE2_INT_TERNARY_LANE): Likewise. (SVE2_FP_TERNARY_LONG, SVE2_FP_TERNARY_LONG_LANE, SVE2_INT_CMLA) (SVE2_INT_CDOT, SVE2_INT_ADD_BINARY_LONG, SVE2_INT_QADD_BINARY_LONG) (SVE2_INT_SUB_BINARY_LONG, SVE2_INT_QSUB_BINARY_LONG): Likewise. (SVE2_INT_ADD_BINARY_LONG_LANE, SVE2_INT_QADD_BINARY_LONG_LANE) (SVE2_INT_SUB_BINARY_LONG_LANE, SVE2_INT_QSUB_BINARY_LONG_LANE) (SVE2_COND_INT_UNARY_FP, SVE2_COND_FP_UNARY_LONG): Likewise. (SVE2_COND_FP_UNARY_NARROWB, SVE2_COND_INT_BINARY): Likewise. (SVE2_COND_INT_BINARY_NOREV, SVE2_COND_INT_BINARY_REV): Likewise. (SVE2_COND_INT_SHIFT, SVE2_MATCH, SVE2_PMULL): Likewise. (optab): Handle the new unspecs. (su, r): Remove entries for UNSPEC_SHRNB, UNSPEC_SHRNT, UNSPEC_RSHRNB and UNSPEC_RSHRNT. (lr): Handle the new unspecs. (bt): Delete. (cmp_op, while_optab_cmp, sve_int_op): Handle the new unspecs. (sve_int_op_rev, sve_int_add_op, sve_int_qadd_op, sve_int_sub_op) (sve_int_qsub_op): New int attributes. (sve_fp_op, rot): Handle the new unspecs. * config/aarch64/aarch64-sve-builtins.h (function_resolver::require_matching_pointer_type): Declare. (function_resolver::resolve_unary): Add an optional boolean argument. (function_resolver::finish_opt_n_resolution): Add an optional type_suffix_index argument. (gimple_folder::redirect_call): Declare. (gimple_expander::prepare_gather_address_operands): Add an optional bool parameter. * config/aarch64/aarch64-sve-builtins.cc: Include aarch64-sve-builtins-sve2.h. (TYPES_b_unsigned, TYPES_b_integer, TYPES_bh_integer): New macros. (TYPES_bs_unsigned, TYPES_hs_signed, TYPES_hs_integer): Likewise. (TYPES_hd_unsigned, TYPES_hsd_signed): Likewise. (TYPES_hsd_integer): Use TYPES_hsd_signed. (TYPES_s_float_hsd_integer, TYPES_s_float_sd_integer): New macros. (TYPES_s_unsigned): Likewise. (TYPES_s_integer): Use TYPES_s_unsigned. (TYPES_sd_signed, TYPES_sd_unsigned): New macros. (TYPES_sd_integer): Use them. (TYPES_d_unsigned): New macro. (TYPES_d_integer): Use it. (TYPES_d_data, TYPES_cvt_long, TYPES_cvt_narrow_s): New macros. (TYPES_cvt_narrow): Likewise. (DEF_SVE_TYPES_ARRAY): Include the new types macros above. (preds_mx): New variable. (function_builder::add_overloaded_function): Allow the new feature set to be more restrictive than the original one. (function_resolver::infer_pointer_type): Remove qualifiers from the pointer type before printing it. (function_resolver::require_matching_pointer_type): New function. (function_resolver::resolve_sv_displacement): Handle functions that don't support 32-bit vector indices or svint32_t vector offsets. (function_resolver::finish_opt_n_resolution): Take the inferred type as a separate argument. (function_resolver::resolve_unary): Optionally treat all forms in the same way as normal merging functions. (gimple_folder::redirect_call): New function. (function_expander::prepare_gather_address_operands): Add an argument that says whether scaled forms are available. If they aren't, handle scaling of vector indices and don't add the extension and scaling operands. (function_expander::map_to_unspecs): If aarch64_sve isn't available, fall back to using cond_* instead. * config/aarch64/aarch64-sve-builtins-functions.h (rtx_code_function): Split out the member variables into... (rtx_code_function_base): ...this new base class. (rtx_code_function_rotated): Inherit rtx_code_function_base. (unspec_based_function): Split out the member variables into... (unspec_based_function_base): ...this new base class. (unspec_based_function_rotated): Inherit unspec_based_function_base. (unspec_based_function_exact_insn): New class. (unspec_based_add_function, unspec_based_add_lane_function) (unspec_based_lane_function, unspec_based_pred_function) (unspec_based_qadd_function, unspec_based_qadd_lane_function) (unspec_based_qsub_function, unspec_based_qsub_lane_function) (unspec_based_sub_function, unspec_based_sub_lane_function): New typedefs. (unspec_based_fused_function): New class. (unspec_based_mla_function, unspec_based_mls_function): New typedefs. (unspec_based_fused_lane_function): New class. (unspec_based_mla_lane_function, unspec_based_mls_lane_function): New typedefs. (CODE_FOR_MODE1): New macro. (fixed_insn_function): New class. (while_comparison): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.h (binary_long_lane) (binary_long_opt_n, binary_narrowb_opt_n, binary_narrowt_opt_n) (binary_to_uint, binary_wide, binary_wide_opt_n, compare, compare_ptr) (load_ext_gather_index_restricted, load_ext_gather_offset_restricted) (load_gather_sv_restricted, shift_left_imm_long): Declare. (shift_left_imm_to_uint, shift_right_imm_narrowb): Likewise. (shift_right_imm_narrowt, shift_right_imm_narrowb_to_uint): Likewise. (shift_right_imm_narrowt_to_uint, store_scatter_index_restricted) (store_scatter_offset_restricted, tbl_tuple, ternary_long_lane) (ternary_long_opt_n, ternary_qq_lane_rotate, ternary_qq_rotate) (ternary_shift_left_imm, ternary_shift_right_imm, ternary_uint) (unary_convert_narrowt, unary_long, unary_narrowb, unary_narrowt) (unary_narrowb_to_uint, unary_narrowt_to_uint, unary_to_int): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): Also add an initial argument for unary_convert_narrowt, regardless of the predication type. (build_32_64): Allow loads and stores to specify MODE_none. (build_sv_index64, build_sv_uint_offset): New functions. (long_type_suffix): New function. (binary_imm_narrowb_base, binary_imm_narrowt_base): New classes. (binary_imm_long_base, load_gather_sv_base): Likewise. (shift_right_imm_narrow_wrapper, ternary_shift_imm_base): Likewise. (ternary_resize2_opt_n_base, ternary_resize2_lane_base): Likewise. (unary_narrowb_base, unary_narrowt_base): Likewise. (binary_long_lane_def, binary_long_lane): New shape. (binary_long_opt_n_def, binary_long_opt_n): Likewise. (binary_narrowb_opt_n_def, binary_narrowb_opt_n): Likewise. (binary_narrowt_opt_n_def, binary_narrowt_opt_n): Likewise. (binary_to_uint_def, binary_to_uint): Likewise. (binary_wide_def, binary_wide): Likewise. (binary_wide_opt_n_def, binary_wide_opt_n): Likewise. (compare_def, compare): Likewise. (compare_ptr_def, compare_ptr): Likewise. (load_ext_gather_index_restricted_def, load_ext_gather_index_restricted): Likewise. (load_ext_gather_offset_restricted_def, load_ext_gather_offset_restricted): Likewise. (load_gather_sv_def): Inherit from load_gather_sv_base. (load_gather_sv_restricted_def, load_gather_sv_restricted): New shape. (shift_left_imm_def, shift_left_imm): Likewise. (shift_left_imm_long_def, shift_left_imm_long): Likewise. (shift_left_imm_to_uint_def, shift_left_imm_to_uint): Likewise. (store_scatter_index_restricted_def, store_scatter_index_restricted): Likewise. (store_scatter_offset_restricted_def, store_scatter_offset_restricted): Likewise. (tbl_tuple_def, tbl_tuple): Likewise. (ternary_long_lane_def, ternary_long_lane): Likewise. (ternary_long_opt_n_def, ternary_long_opt_n): Likewise. (ternary_qq_lane_def): Inherit from ternary_resize2_lane_base. (ternary_qq_lane_rotate_def, ternary_qq_lane_rotate): New shape (ternary_qq_opt_n_def): Inherit from ternary_resize2_opt_n_base. (ternary_qq_rotate_def, ternary_qq_rotate): New shape. (ternary_shift_left_imm_def, ternary_shift_left_imm): Likewise. (ternary_shift_right_imm_def, ternary_shift_right_imm): Likewise. (ternary_uint_def, ternary_uint): Likewise. (unary_convert): Fix typo in comment. (unary_convert_narrowt_def, unary_convert_narrowt): New shape. (unary_long_def, unary_long): Likewise. (unary_narrowb_def, unary_narrowb): Likewise. (unary_narrowt_def, unary_narrowt): Likewise. (unary_narrowb_to_uint_def, unary_narrowb_to_uint): Likewise. (unary_narrowt_to_uint_def, unary_narrowt_to_uint): Likewise. (unary_to_int_def, unary_to_int): Likewise. * config/aarch64/aarch64-sve-builtins-base.cc (unspec_cmla) (unspec_fcmla, unspec_cond_fcmla, expand_mla_mls_lane): New functions. (svasrd_impl): Delete. (svcadd_impl::expand): Handle integer operations too. (svcmla_impl::expand, svcmla_lane::expand): Likewise, using the new functions to derive the unspec numbers. (svmla_svmls_lane_impl): Replace with... (svmla_lane_impl, svmls_lane_impl): ...these new classes. Handle integer operations too. (svwhile_impl): Rename to... (svwhilelx_impl): ...this and inherit from while_comparison. (svasrd): Use unspec_based_function. (svmla_lane): Use svmla_lane_impl. (svmls_lane): Use svmls_lane_impl. (svrecpe, svrsqrte): Handle unsigned integer operations too. (svwhilele, svwhilelt): Use svwhilelx_impl. * config/aarch64/aarch64-sve-builtins-sve2.h: New file. * config/aarch64/aarch64-sve-builtins-sve2.cc: Likewise. * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. * config/aarch64/aarch64-sve-builtins.def: Include aarch64-sve-builtins-sve2.def. gcc/testsuite/ * g++.target/aarch64/sve/acle/general-c++/mul_lane_1.c: New test. * g++.target/aarch64/sve2/acle: New directory. * gcc.target/aarch64/pragma_cpp_predefs_3.c: New test. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_TYPE_CHANGE_Z) (TEST_DUAL_ZD, TEST_TYPE_CHANGE_ZX, TEST_TBL2, TEST_TBL2_REV): New macros. * gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c: Do not expect an error saying that the function has no f32 form, but instead expect an error about SVE2 being required if the current target doesn't support SVE2. * gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c: Likewise. * gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c Likewise. * gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/compare_1.c, * gcc.target/aarch64/sve/acle/general-c/compare_ptr_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_2.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_3.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_4.c, * gcc.target/aarch64/sve/acle/general-c/load_gather_sv_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_gather_sv_restricted_2.c, * gcc.target/aarch64/sve/acle/general-c/mul_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_long_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_to_uint_2.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/store_scatter_index_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/tbl_tuple_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c: New tests. * gcc.target/aarch64/sve2/bcax_1.c: Likewise. * gcc.target/aarch64/sve2/acle: New directory. From-SVN: r280060
Diffstat (limited to 'gcc/config/aarch64/aarch64-sve2.md')
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md2129
1 files changed, 2086 insertions, 43 deletions
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 1b2b6b2..eaded5d 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -21,28 +21,191 @@
;; The file is organised into the following sections (search for the full
;; line):
;;
+;; == Moves
+;; ---- Non-temporal gather loads
+;; ---- Non-temporal scatter stores
+;;
;; == Uniform binary arithmnetic
+;; ---- [INT] Multiplication
;; ---- [INT] Scaled high-part multiplication
;; ---- [INT] General binary arithmetic that maps to unspecs
+;; ---- [INT] Saturating binary arithmetic
+;; ---- [INT] Saturating left shifts
;;
;; == Uniform ternary arithmnetic
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; ---- [INT] Multiply-and-accumulate operations
+;; ---- [INT] Binary logic operations with rotation
;; ---- [INT] Ternary logic operations
;; ---- [INT] Shift-and-accumulate operations
+;; ---- [INT] Shift-and-insert operations
+;; ---- [INT] Sum of absolute differences
;;
;; == Extending arithmetic
+;; ---- [INT] Wide binary arithmetic
;; ---- [INT] Long binary arithmetic
+;; ---- [INT] Long left shifts
+;; ---- [INT] Long binary arithmetic with accumulation
+;; ---- [FP] Long multiplication with accumulation
;;
;; == Narrowing arithnetic
+;; ---- [INT] Narrowing unary arithmetic
+;; ---- [INT] Narrowing binary arithmetic
;; ---- [INT] Narrowing right shifts
;;
+;; == Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic
+;; ---- [FP] Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic with accumulation
+;;
+;; == Complex arithmetic
+;; ---- [INT] Complex binary operations
+;; ---- [INT] Complex ternary operations
+;; ---- [INT] Complex dot product
+;;
+;; == Conversions
+;; ---- [FP<-FP] Widening conversions
+;; ---- [FP<-FP] Narrowing conversions
+;;
+;; == Other arithmetic
+;; ---- [INT] Reciprocal approximation
+;; ---- [INT<-FP] Base-2 logarithm
+;; ---- [INT] Polynomial multiplication
+;;
+;; == Permutation
+;; ---- [INT,FP] General permutes
+;; ---- [INT] Optional bit-permute extensions
+;;
;; == General
;; ---- Check for aliases between pointers
+;; ---- Histogram processing
+;; ---- String matching
+;;
+;; == Crypotographic extensions
+;; ---- Optional AES extensions
+;; ---- Optional SHA-3 extensions
+;; ---- Optional SM4 extensions
+
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LDNT1B
+;; - LDNT1D
+;; - LDNT1H
+;; - LDNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-extending loads.
+(define_insn "@aarch64_gather_ldnt<mode>"
+ [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
+ (unspec:SVE_FULL_SD
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
+ (mem:BLK (scratch))]
+ UNSPEC_LDNT1_GATHER))]
+ "TARGET_SVE2"
+ "@
+ ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
+ ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
+)
+
+;; Extending loads.
+(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
+ (ANY_EXTEND:SVE_FULL_SDI
+ (unspec:SVE_PARTIAL_I
+ [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
+ (mem:BLK (scratch))]
+ UNSPEC_LDNT1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2
+ && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "@
+ ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
+ ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - STNT1B
+;; - STNT1D
+;; - STNT1H
+;; - STNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-truncating stores.
+(define_insn "@aarch64_scatter_stnt<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
+
+ UNSPEC_STNT1_SCATTER))]
+ "TARGET_SVE"
+ "@
+ stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
+ stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
+)
+
+;; Truncating stores.
+(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
+ (truncate:SVE_PARTIAL_I
+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
+ UNSPEC_STNT1_SCATTER))]
+ "TARGET_SVE2
+ && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "@
+ stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
+ stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
+)
;; =========================================================================
;; == Uniform binary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MUL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+ "TARGET_SVE2"
+ "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Scaled high-part multiplication
;; -------------------------------------------------------------------------
;; The patterns in this section are synthetic.
@@ -64,12 +227,15 @@
rtx prod_b = gen_reg_rtx (<VWIDE>mode);
rtx prod_t = gen_reg_rtx (<VWIDE>mode);
- emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2]));
- emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2]));
+ emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
+ operands[2]));
+ emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
+ operands[2]));
rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
- emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift));
- emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift));
+ emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
+ emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
+ prod_t, shift));
DONE;
}
@@ -81,10 +247,22 @@
;; Includes:
;; - SHADD
;; - SHSUB
+;; - SHSUBR
+;; - SQRSHL
+;; - SQRSHLR
;; - SRHADD
+;; - SRSHL
+;; - SRSHLR
+;; - SUQADD
;; - UHADD
;; - UHSUB
+;; - UHSUBR
+;; - UQRSHL
+;; - UQRSHLR
;; - URHADD
+;; - URSHL
+;; - URSHLR
+;; - USQADD
;; -------------------------------------------------------------------------
;; Integer average (floor).
@@ -119,31 +297,485 @@
}
)
-;; Predicated halving addsub.
-(define_insn "*<sur>h<addsub><mode>"
+;; The immediate form of SQADD acts as an immediate form of SUQADD
+;; over its full range. In contrast to the ss_plus pattern, we do
+;; not need to treat byte immediates specially. E.g.:
+;;
+;; SQADD Z0.B, Z0.B, #128
+;;
+;; is equivalent to:
+;;
+;; MOV Z1.B, #128
+;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
+;;
+;; even though it's not equivalent to:
+;;
+;; MOV Z1.B, #128
+;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
+(define_insn "@aarch64_sve_suqadd<mode>_const"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
+ UNSPEC_SUQADD))]
+ "TARGET_SVE2"
+ "@
+ sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
+ movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; General predicated binary arithmetic. All operations handled here
+;; are commutative or have a reversed form.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated binary arithmetic with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "register_operand")]
+ SVE2_COND_INT_BINARY)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[5] = CONSTM1_RTX (<MODE>mode);
+ }
+)
+
+;; Predicated binary arithmetic, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_COND_INT_BINARY)]
+ UNSPEC_PRED_X)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary arithmetic, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
- [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
- (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
- HADDSUB)]
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& 1"
+ {
+ if (reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4]))
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ else if (!CONSTANT_P (operands[5]))
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ else
+ FAIL;
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; Predicated binary operations with no reverse form, merging with zero.
+;; At present we don't generate these patterns via a cond_* optab,
+;; so there's no correctness requirement to handle merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_COND_INT_BINARY_NOREV)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[5])"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQDMULH
+;; - SQRDMULH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+ SVE2_INT_BINARY))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_BINARY_LANE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQSHL
+;; - SQSHLR
+;; - UQSHL
+;; - UQSHLR
+;; -------------------------------------------------------------------------
+
+;; Predicated left shifts.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
- <sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,*,yes,yes")]
+)
+
+;; Predicated left shifts with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Predicated left shifts, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Predicated left shifts, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
[(set_attr "movprfx" "*,yes")]
)
+;; Predicated left shifts, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[4])
+ && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #
+ #"
+ "&& 1"
+ {
+ if (reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4]))
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ else if (!CONSTANT_P (operands[5]))
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ else
+ FAIL;
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; =========================================================================
;; == Uniform ternary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADCLB
+;; - ADCLT
+;; - EORBT
+;; - EORTB
+;; - SBCLB
+;; - SBCLT
+;; - SQRDMLAH
+;; - SQRDMLSH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
+ SVE2_INT_TERNARY))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
+ SVE2_INT_TERNARY_LANE))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiply-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MLA
+;; - MLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_HSDI
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_sve_sub_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
+ "TARGET_SVE2"
+ "@
+ mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logic operations with rotation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - XAR
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_xar<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (rotatert:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
+ "TARGET_SVE2"
+ "@
+ xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Ternary logic operations
;; -------------------------------------------------------------------------
;; Includes:
+;; - BCAX
;; - BSL
;; - BSL1N
;; - BSL2N
@@ -151,8 +783,23 @@
;; - NBSL
;; -------------------------------------------------------------------------
+;; Unpredicated exclusive OR of AND.
+(define_insn "@aarch64_sve2_bcax<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ bcax\t%0.d, %0.d, %2.d, %3.d
+ movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Unpredicated 3-way exclusive OR.
-(define_insn "*aarch64_sve2_eor3<mode>"
+(define_insn "@aarch64_sve2_eor3<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
(xor:SVE_FULL_I
(xor:SVE_FULL_I
@@ -214,6 +861,18 @@
;; Unpredicated bitwise select.
;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ "TARGET_SVE2"
+)
+
(define_insn "*aarch64_sve2_bsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
@@ -232,6 +891,25 @@
;; Unpredicated bitwise inverted select.
;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
+(define_expand "@aarch64_sve2_nbsl<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (not:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
@@ -258,6 +936,25 @@
;; Unpredicated bitwise select with inverted first operand.
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl1n<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (not:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
@@ -284,6 +981,26 @@
;; Unpredicated bitwise select with inverted second operand.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
+(define_expand "@aarch64_sve2_bsl2n<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (ior:SVE_FULL_I
+ (and:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (and:SVE_FULL_I
+ (not:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (not:SVE_FULL_I
+ (match_dup 3)))]
+ UNSPEC_PRED_X)))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(ior:SVE_FULL_I
@@ -340,27 +1057,144 @@
;; ---- [INT] Shift-and-accumulate operations
;; -------------------------------------------------------------------------
;; Includes:
+;; - SRSRA
;; - SSRA
+;; - URSRA
;; - USRA
;; -------------------------------------------------------------------------
-;; Unpredicated signed / unsigned shift-right accumulate.
+;; Provide the natural unpredicated interface for SSRA and USRA.
+(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (plus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (SHIFTRT:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 1 "register_operand")))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Pattern-match SSRA and USRA as a predicated operation whose predicate
+;; isn't needed.
(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(SHIFTRT:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand" "w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))]
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
UNSPEC_PRED_X)
- (match_operand:SVE_FULL_I 1 "register_operand" "0")))]
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
- "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+ "@
+ <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; SRSRA and URSRA.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
+ VRSHR_N)
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shift-and-insert operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SLI
+;; - SRI
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+ SVE2_INT_SHIFT_INSERT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABA
+;; - UABA
+;; -------------------------------------------------------------------------
+
+;; Provide the natural unpredicated interface for SABA and UABA.
+(define_expand "@aarch64_sve2_<su>aba<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (minus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (<max_opp>:SVE_FULL_I
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_X))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
+;; operation whose predicates aren't needed.
+(define_insn "*aarch64_sve2_<su>aba<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (minus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (<max_opp>:SVE_FULL_I
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_X))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
@@ -368,24 +1202,302 @@
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Wide binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADDWB
+;; - SADDWT
+;; - SSUBWB
+;; - SSUBWT
+;; - UADDWB
+;; - UADDWT
+;; - USUBWB
+;; - USUBWT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_INT_BINARY_WIDE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Long binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
+;; - SABDLB
+;; - SABDLT
+;; - SADDLB
+;; - SADDLBT
+;; - SADDLT
;; - SMULLB
;; - SMULLT
+;; - SQDMULLB
+;; - SQDMULLT
+;; - SSUBLB
+;; - SSUBLBT
+;; - SSUBLT
+;; - SSUBLTB
+;; - UABDLB
+;; - UABDLT
+;; - UADDLB
+;; - UADDLT
;; - UMULLB
;; - UMULLT
+;; - USUBLB
+;; - USUBLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_INT_BINARY_LONG))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_BINARY_LONG_LANE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SSHLLB
+;; - SSHLLT
+;; - USHLLB
+;; - USHLLT
+;; -------------------------------------------------------------------------
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_LONG))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long binary arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABALB
+;; - SABALT
+;; - SMLALB
+;; - SMLALT
+;; - SMLSLB
+;; - SMLSLT
+;; - SQDMLALB
+;; - SQDMLALBT
+;; - SQDMLALT
+;; - SQDMLSLB
+;; - SQDMLSLBT
+;; - SQDMLSLT
+;; - UABALB
+;; - UABALT
+;; - UMLALB
+;; - UMLALT
+;; - UMLSLB
+;; - UMLSLT
+;; -------------------------------------------------------------------------
+
+;; Non-saturating MLA operations.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_ADD_BINARY_LONG)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_SDI
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_ADD_BINARY_LONG_LANE)
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations.
+(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (ss_plus:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_QADD_BINARY_LONG)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (ss_plus:SVE_FULL_SDI
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_QADD_BINARY_LONG_LANE)
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations.
+(define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_SUB_BINARY_LONG)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_SDI
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_SUB_BINARY_LONG_LANE)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations.
+(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (ss_minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_QSUB_BINARY_LONG)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (ss_minus:SVE_FULL_SDI
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_QSUB_BINARY_LONG_LANE)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
;; -------------------------------------------------------------------------
+;; ---- [FP] Long multiplication with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLALB
+;; - FMLALT
+;; - FMLSLB
+;; - FMLSLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+ (match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
+ SVE2_FP_TERNARY_LONG))]
+ "TARGET_SVE2"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
-;; Multiply long top / bottom.
-(define_insn "<su>mull<bt><Vwide>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE>
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
- (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")]
- MULLBT))]
+(define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
+ SVE2_FP_TERNARY_LONG_LANE))]
"TARGET_SVE2"
- "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
+ movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+ [(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
@@ -393,6 +1505,74 @@
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing unary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQXTNB
+;; - SQXTNT
+;; - SQXTUNB
+;; - SQXTUNT
+;; - UQXTNB
+;; - UQXTNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
+ SVE2_INT_UNARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+ SVE2_INT_UNARY_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDHNB
+;; - ADDHNT
+;; - RADDHNB
+;; - RADDHNT
+;; - RSUBHNB
+;; - RSUBHNT
+;; - SUBHNB
+;; - SUBHNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+ SVE2_INT_BINARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
+ SVE2_INT_BINARY_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing right shifts
;; -------------------------------------------------------------------------
;; Includes:
@@ -400,29 +1580,653 @@
;; - RSHRNT
;; - SHRNB
;; - SHRNT
+;; - SQRSHRNB
+;; - SQRSHRNT
+;; - SQRSHRUNB
+;; - SQRSHRUNT
+;; - SQSHRNB
+;; - SQSHRNT
+;; - SQSHRUNB
+;; - SQSHRUNT
+;; - UQRSHRNB
+;; - UQRSHRNT
+;; - UQSHRNB
+;; - UQSHRNT
;; -------------------------------------------------------------------------
-;; (Rounding) Right shift narrow bottom.
-(define_insn "<r>shrnb<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")]
- SHRNB))]
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_NARROWB))]
"TARGET_SVE2"
- "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
)
-;; (Rounding) Right shift narrow top.
-(define_insn "<r>shrnt<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0")
- (match_operand:<VWIDE> 2 "register_operand" "w")
- (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")]
- SHRNT))]
+;; The immediate range is enforced before generating the instruction.
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
+)
+
+;; =========================================================================
+;; == Pairwise arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDP
+;; - SMAXP
+;; - SMINP
+;; - UMAXP
+;; - UMINP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDP
+;; - FMAXP
+;; - FMAXNMP
+;; - FMINP
+;; - FMINNMP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ SVE2_FP_BINARY_PAIR))]
+ "TARGET_SVE2"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADALP
+;; - UADALP
+;; -------------------------------------------------------------------------
+
+;; Predicated pairwise absolute difference and accumulate with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_HSDI
+ [(match_dup 1)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand")
+ (match_operand:<VNARROW> 3 "register_operand")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+{
+ /* Only target code is aware of these operations, so we don't need
+ to handle the fully-general case. */
+ gcc_assert (rtx_equal_p (operands[2], operands[4])
+ || CONSTANT_P (operands[4]));
+})
+
+;; Predicated pairwise absolute difference and accumulate, merging with
+;; the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand 4)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated pairwise absolute difference and accumulate, merging with zero.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand 5)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+ "&& !CONSTANT_P (operands[5])"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; =========================================================================
+;; == Complex arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CADD
+;; - SQCADD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
+ SVE2_INT_CADD))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex ternary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CMLA
+;; - SQRDCMLA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_INT_CMLA))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSI
+ [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
+ (unspec:SVE_FULL_HSI
+ [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_CMLA))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+ SVE2_INT_CDOT))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+ (unspec:<VSI2QI>
+ [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_CDOT))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Widening conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTLT
+;; -------------------------------------------------------------------------
+
+;; Predicated convert long top.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_LONG))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+)
+
+;; Predicated convert long top with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_SDF
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 2 "register_operand")]
+ SVE2_COND_FP_UNARY_LONG)
+ (match_operand:SVE_FULL_SDF 3 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_FULL_SDF
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_LONG)
+ (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Narrowing conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTNT
+;; - FCVTX
+;; - FCVTXNT
+;; -------------------------------------------------------------------------
+
+;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_cvtnt<mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+ UNSPEC_COND_FCVTNT))]
+ "TARGET_SVE2"
+ "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+)
+
+;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
+;; it supports MOVPRFX).
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:<VWIDE> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+)
+
+;; Predicated FCVTX with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand")
+ (unspec:VNx4SF_ONLY
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VWIDE> 2 "register_operand")]
+ SVE2_COND_FP_UNARY_NARROWB)
+ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx4SF_ONLY
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+ SVE2_COND_FP_UNARY_NARROWB)
+ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[3])
+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_cvtxnt<mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
+ UNSPEC_COND_FCVTXNT))]
+ "TARGET_SVE2"
+ "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+)
+
+;; =========================================================================
+;; == Other arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Reciprocal approximation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - URECPE
+;; - URSQRTE
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; Predicated integer unary operations with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:VNx4SI_ONLY
+ [(match_dup 4)
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X)
+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<MODE>mode);
+ }
+)
+
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx4SI_ONLY
+ [(match_operand 4)
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X)
+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Base-2 logarithm
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FLOGB
+;; -------------------------------------------------------------------------
+
+;; Predicated FLOGB.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ SVE2_COND_INT_UNARY_FP))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; Predicated FLOGB with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:<V_INT_EQUIV>
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ SVE2_COND_INT_UNARY_FP)
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+ SVE2_COND_INT_UNARY_FP)
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[3])
+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Polynomial multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PMUL
+;; - PMULLB
+;; - PMULLT
+;; -------------------------------------------------------------------------
+
+;; Uniform PMUL.
+(define_insn "@aarch64_sve2_pmul<mode>"
+ [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+ (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+ UNSPEC_PMUL))]
+ "TARGET_SVE2"
+ "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Extending PMUL, with the results modeled as wider vectors.
+;; This representation is only possible for .H and .D, not .Q.
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_PMULL))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+;; Extending PMUL, with the results modeled as pairs of values.
+;; This representation works for .H, .D and .Q, with .Q requiring
+;; the AES extension. (This is enforced by the mode iterator.)
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
+ (unspec:SVE2_PMULL_PAIR_I
+ [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
+ (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
+ SVE2_PMULL_PAIR))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == Permutation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL (vector pair form)
+;; - TBX
+;; -------------------------------------------------------------------------
+
+;; TBL on a pair of data vectors.
+(define_insn "@aarch64_sve2_tbl2<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:<VDOUBLE> 1 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+ UNSPEC_TBL2))]
"TARGET_SVE2"
- "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3"
+ "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
+)
+
+;; TBX. These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_tbx<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "0")
+ (match_operand:SVE_FULL 2 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
+ UNSPEC_TBX))]
+ "TARGET_SVE2"
+ "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Optional bit-permute extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BDEP
+;; - BEXT
+;; - BGRP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+ SVE2_INT_BITPERM))]
+ "TARGET_SVE2_BITPERM"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
;; =========================================================================
@@ -469,3 +2273,242 @@
emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- Histogram processing
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - HISTCNT
+;; - HISTSEG
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_histcnt<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
+ UNSPEC_HISTCNT))]
+ "TARGET_SVE2"
+ "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
+(define_insn "@aarch64_sve2_histseg<mode>"
+ [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+ (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+ UNSPEC_HISTSEG))]
+ "TARGET_SVE2"
+ "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- String matching
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MATCH
+;; - NMATCH
+;; -------------------------------------------------------------------------
+
+;; Predicated string matching.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
+;; Predicated string matching in which both the flag and predicate results
+;; are interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (unspec:<VPRED>
+ [(match_dup 2)
+ (match_dup 3)]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z))]
+ "TARGET_SVE2
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+;; Predicated string matching in which only the flags result is interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:<VPRED> 0 "=Upa"))]
+ "TARGET_SVE2
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+;; =========================================================================
+;; == Crypotographic extensions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Optional AES extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AESD
+;; - AESE
+;; - AESIMC
+;; - AESMC
+;; -------------------------------------------------------------------------
+
+;; AESD and AESE.
+(define_insn "aarch64_sve2_aes<aes_op>"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ CRYPTO_AES))]
+ "TARGET_SVE2_AES"
+ "aes<aes_op>\t%0.b, %0.b, %2.b"
+ [(set_attr "type" "crypto_aese")]
+)
+
+;; AESMC and AESIMC. These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_aes<aesmc_op>"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(match_operand:VNx16QI 1 "register_operand" "0")]
+ CRYPTO_AESMC))]
+ "TARGET_SVE2_AES"
+ "aes<aesmc_op>\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aesmc")]
+)
+
+;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
+;; to keep the two together and enforce the register dependency without
+;; scheduling or register allocation messing up the order or introducing
+;; moves inbetween. Mash the two together during combine.
+
+(define_insn "*aarch64_sve2_aese_fused"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ UNSPEC_AESE)]
+ UNSPEC_AESMC))]
+ "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+(define_insn "*aarch64_sve2_aesd_fused"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ UNSPEC_AESD)]
+ UNSPEC_AESIMC))]
+ "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SHA-3 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - RAX1
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_sve2_rax1"
+ [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+ (xor:VNx2DI
+ (rotate:VNx2DI
+ (match_operand:VNx2DI 2 "register_operand" "w")
+ (const_int 1))
+ (match_operand:VNx2DI 1 "register_operand" "w")))]
+ "TARGET_SVE2_SHA3"
+ "rax1\t%0.d, %1.d, %2.d"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SM4 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SM4E
+;; - SM4EKEY
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_sm4e"
+ [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+ (unspec:VNx4SI
+ [(match_operand:VNx4SI 1 "register_operand" "0")
+ (match_operand:VNx4SI 2 "register_operand" "w")]
+ UNSPEC_SM4E))]
+ "TARGET_SVE2_SM4"
+ "sm4e\t%0.s, %0.s, %2.s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+(define_insn "aarch64_sve2_sm4ekey"
+ [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+ (unspec:VNx4SI
+ [(match_operand:VNx4SI 1 "register_operand" "w")
+ (match_operand:VNx4SI 2 "register_operand" "w")]
+ UNSPEC_SM4EKEY))]
+ "TARGET_SVE2_SM4"
+ "sm4ekey\t%0.s, %1.s, %2.s"
+ [(set_attr "type" "crypto_sm4")]
+)