aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-01-09 16:36:42 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2020-01-09 16:36:42 +0000
commit0a09a9483825233f16e5b26bb0ffee76752339fc (patch)
treefe83aaee3f4299895706f7e0e40af7ebc6ab3d6d /gcc/config/aarch64
parentf3582fda783496cc268467973c2c9860cd159b3d (diff)
downloadgcc-0a09a9483825233f16e5b26bb0ffee76752339fc.zip
gcc-0a09a9483825233f16e5b26bb0ffee76752339fc.tar.gz
gcc-0a09a9483825233f16e5b26bb0ffee76752339fc.tar.bz2
[AArch64] Add support for the SVE2 ACLE
This patch adds support for the SVE2 ACLE, The implementation and tests follow the same pattern as the exiting SVE ACLE support. 2020-01-09 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config.gcc (aarch64*-*-*): Add aarch64-sve-builtins-sve2.o to extra_objs. * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on aarch64-sve-builtins-base.def, aarch64-sve-builtins-sve2.def and aarch64-sve-builtins-sve2.h. (aarch64-sve-builtins-sve2.o): New rule. * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_AES): New macro. (AARCH64_ISA_SVE2_BITPERM, AARCH64_ISA_SVE2_SHA3): Likewise. (AARCH64_ISA_SVE2_SM4, TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Likewise. (TARGET_SVE2_SHA, TARGET_SVE2_SM4): Likewise. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Handle TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3 and TARGET_SVE2_SM4. * config/aarch64/aarch64-sve.md: Update comments with SVE2 instructions that are handled here. (@cond_asrd<mode>): Generalize to... (@cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>): ...this. (*cond_asrd<mode>_2): Generalize to... (*cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>_2): ...this. (*cond_asrd<mode>_z): Generalize to... (*cond_<SVE_INT_SHIFT_IMM:sve_int_op><mode>_z): ...this. * config/aarch64/aarch64.md (UNSPEC_LDNT1_GATHER): New unspec. (UNSPEC_STNT1_SCATTER, UNSPEC_WHILEGE, UNSPEC_WHILEGT): Likewise. (UNSPEC_WHILEHI, UNSPEC_WHILEHS): Likewise. * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>): New pattern. (@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>) (@aarch64_scatter_stnt<mode>): Likewise. (@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>) (@aarch64_mul_lane_<mode>): Likewise. (@aarch64_sve_suqadd<mode>_const): Likewise. (*<sur>h<addsub><mode>): Generalize to... (@aarch64_pred_<SVE2_COND_INT_BINARY_REV:sve_int_op><mode>): ...this new pattern. (@cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>): New expander. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_3): Likewise. (*cond_<SVE2_COND_INT_BINARY:sve_int_op><mode>_any): Likewise. (*cond_<SVE2_COND_INT_BINARY_NOREV:sve_int_op><mode>_z): Likewise. (@aarch64_sve_<SVE2_INT_BINARY:sve_int_op><mode>):: Likewise. (@aarch64_sve_<SVE2_INT_BINARY:sve_int_op>_lane_<mode>): Likewise. (@aarch64_pred_<SVE2_COND_INT_SHIFT:sve_int_op><mode>): Likewise. (@cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>): New expander. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_3): Likewise. (*cond_<SVE2_COND_INT_SHIFT:sve_int_op><mode>_any): Likewise. (@aarch64_sve_<SVE2_INT_TERNARY:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_TERNARY_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_add_mul_lane_<mode>): Likewise. (@aarch64_sve_sub_mul_lane_<mode>): Likewise. (@aarch64_sve2_xar<mode>): Likewise. (@aarch64_sve2_bcax<mode>): Likewise. (*aarch64_sve2_eor3<mode>): Rename to... (@aarch64_sve2_eor3<mode>): ...this. (@aarch64_sve2_bsl<mode>): New expander. (@aarch64_sve2_nbsl<mode>): Likewise. (@aarch64_sve2_bsl1n<mode>): Likewise. (@aarch64_sve2_bsl2n<mode>): Likewise. (@aarch64_sve_add_<SHIFTRT:sve_int_op><mode>): Likewise. (*aarch64_sve2_sra<mode>): Add MOVPRFX support. (@aarch64_sve_add_<VRSHR_N:sve_int_op><mode>): New pattern. (@aarch64_sve_<SVE2_INT_SHIFT_INSERT:sve_int_op><mode>): Likewise. (@aarch64_sve2_<USMAX:su>aba<mode>): New expander. (*aarch64_sve2_<USMAX:su>aba<mode>): New pattern. (@aarch64_sve_<SVE2_INT_BINARY_WIDE:sve_int_op><mode>): Likewise. (<su>mull<bt><Vwide>): Generalize to... (@aarch64_sve_<SVE2_INT_BINARY_LONG:sve_int_op><mode>): ...this new pattern. (@aarch64_sve_<SVE2_INT_BINARY_LONG_lANE:sve_int_op>_lane_<mode>) (@aarch64_sve_<SVE2_INT_SHIFT_IMM_LONG:sve_int_op><mode>) (@aarch64_sve_add_<SVE2_INT_ADD_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_add_<SVE2_INT_ADD_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_qadd_<SVE2_INT_QADD_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_qadd_<SVE2_INT_QADD_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_sub_<SVE2_INT_SUB_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_sub_<SVE2_INT_SUB_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_qsub_<SVE2_INT_QSUB_BINARY_LONG:sve_int_op><mode>) (@aarch64_sve_qsub_<SVE2_INT_QSUB_BINARY_LONG_LANE:sve_int_op>_lane_<mode>) (@aarch64_sve_<SVE2_FP_TERNARY_LONG:sve_fp_op><mode>): New patterns. (@aarch64_<SVE2_FP_TERNARY_LONG_LANE:sve_fp_op>_lane_<mode>) (@aarch64_sve_<SVE2_INT_UNARY_NARROWB:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_UNARY_NARROWT:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_BINARY_NARROWB:sve_int_op><mode>): Likewise. (@aarch64_sve_<SVE2_INT_BINARY_NARROWT:sve_int_op><mode>): Likewise. (<SHRNB:r>shrnb<mode>): Generalize to... (@aarch64_sve_<SVE2_INT_SHIFT_IMM_NARROWB:sve_int_op><mode>): ...this new pattern. (<SHRNT:r>shrnt<mode>): Generalize to... (@aarch64_sve_<SVE2_INT_SHIFT_IMM_NARROWT:sve_int_op><mode>): ...this new pattern. (@aarch64_pred_<SVE2_INT_BINARY_PAIR:sve_int_op><mode>): New pattern. (@aarch64_pred_<SVE2_FP_BINARY_PAIR:sve_fp_op><mode>): Likewise. (@cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>): New expander. (*cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>_2): New pattern. (*cond_<SVE2_INT_BINARY_PAIR_LONG:sve_int_op><mode>_z): Likewise. (@aarch64_sve_<SVE2_INT_CADD:optab><mode>): Likewise. (@aarch64_sve_<SVE2_INT_CMLA:optab><mode>): Likewise. (@aarch64_<SVE2_INT_CMLA:optab>_lane_<mode>): Likewise. (@aarch64_sve_<SVE2_INT_CDOT:optab><mode>): Likewise. (@aarch64_<SVE2_INT_CDOT:optab>_lane_<mode>): Likewise. (@aarch64_pred_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): New pattern. (@aarch64_sve2_cvtnt<mode>): Likewise. (@aarch64_pred_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>_any): New pattern. (@aarch64_sve2_cvtxnt<mode>): Likewise. (@aarch64_pred_<SVE2_U32_UNARY:sve_int_op><mode>): Likewise. (@cond_<SVE2_U32_UNARY:sve_int_op><mode>): New expander. (*cond_<SVE2_U32_UNARY:sve_int_op><mode>): New pattern. (@aarch64_pred_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): Likewise. (@cond_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): New expander. (*cond_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): New pattern. (@aarch64_sve2_pmul<mode>): Likewise. (@aarch64_sve_<SVE2_PMULL:optab><mode>): Likewise. (@aarch64_sve_<SVE2_PMULL_PAIR:optab><mode>): Likewise. (@aarch64_sve2_tbl2<mode>): Likewise. (@aarch64_sve2_tbx<mode>): Likewise. (@aarch64_sve_<SVE2_INT_BITPERM:sve_int_op><mode>): Likewise. (@aarch64_sve2_histcnt<mode>): Likewise. (@aarch64_sve2_histseg<mode>): Likewise. (@aarch64_pred_<SVE2_MATCH:sve_int_op><mode>): Likewise. (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_cc): Likewise. (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_ptest): Likewise. (aarch64_sve2_aes<CRYPTO_AES:aes_op>): Likewise. (aarch64_sve2_aes<CRYPTO_AESMC:aesmc_op>): Likewise. (*aarch64_sve2_aese_fused, *aarch64_sve2_aesd_fused): Likewise. (aarch64_sve2_rax1, aarch64_sve2_sm4e, aarch64_sve2_sm4ekey): Likewise. (<su>mulh<r>s<mode>3): Update after above pattern name changes. * config/aarch64/iterators.md (VNx16QI_ONLY, VNx4SF_ONLY) (SVE_STRUCT2, SVE_FULL_BHI, SVE_FULL_HSI, SVE_FULL_HDI) (SVE2_PMULL_PAIR_I): New mode iterators. (UNSPEC_ADCLB, UNSPEC_ADCLT, UNSPEC_ADDHNB, UNSPEC_ADDHNT, UNSPEC_BDEP) (UNSPEC_BEXT, UNSPEC_BGRP, UNSPEC_CADD90, UNSPEC_CADD270, UNSPEC_CDOT) (UNSPEC_CDOT90, UNSPEC_CDOT180, UNSPEC_CDOT270, UNSPEC_CMLA) (UNSPEC_CMLA90, UNSPEC_CMLA180, UNSPEC_CMLA270, UNSPEC_COND_FCVTLT) (UNSPEC_COND_FCVTNT, UNSPEC_COND_FCVTX, UNSPEC_COND_FCVTXNT) (UNSPEC_COND_FLOGB, UNSPEC_EORBT, UNSPEC_EORTB, UNSPEC_FADDP) (UNSPEC_FMAXP, UNSPEC_FMAXNMP, UNSPEC_FMLALB, UNSPEC_FMLALT) (UNSPEC_FMLSLB, UNSPEC_FMLSLT, UNSPEC_FMINP, UNSPEC_FMINNMP) (UNSPEC_HISTCNT, UNSPEC_HISTSEG, UNSPEC_MATCH, UNSPEC_NMATCH) (UNSPEC_PMULLB, UNSPEC_PMULLB_PAIR, UNSPEC_PMULLT, UNSPEC_PMULLT_PAIR) (UNSPEC_RADDHNB, UNSPEC_RADDHNT, UNSPEC_RSUBHNB, UNSPEC_RSUBHNT) (UNSPEC_SLI, UNSPEC_SRI, UNSPEC_SABDLB, UNSPEC_SABDLT, UNSPEC_SADDLB) (UNSPEC_SADDLBT, UNSPEC_SADDLT, UNSPEC_SADDWB, UNSPEC_SADDWT) (UNSPEC_SBCLB, UNSPEC_SBCLT, UNSPEC_SMAXP, UNSPEC_SMINP) (UNSPEC_SQCADD90, UNSPEC_SQCADD270, UNSPEC_SQDMULLB, UNSPEC_SQDMULLBT) (UNSPEC_SQDMULLT, UNSPEC_SQRDCMLAH, UNSPEC_SQRDCMLAH90) (UNSPEC_SQRDCMLAH180, UNSPEC_SQRDCMLAH270, UNSPEC_SQRSHRNB) (UNSPEC_SQRSHRNT, UNSPEC_SQRSHRUNB, UNSPEC_SQRSHRUNT, UNSPEC_SQSHRNB) (UNSPEC_SQSHRNT, UNSPEC_SQSHRUNB, UNSPEC_SQSHRUNT, UNSPEC_SQXTNB) (UNSPEC_SQXTNT, UNSPEC_SQXTUNB, UNSPEC_SQXTUNT, UNSPEC_SSHLLB) (UNSPEC_SSHLLT, UNSPEC_SSUBLB, UNSPEC_SSUBLBT, UNSPEC_SSUBLT) (UNSPEC_SSUBLTB, UNSPEC_SSUBWB, UNSPEC_SSUBWT, UNSPEC_SUBHNB) (UNSPEC_SUBHNT, UNSPEC_TBL2, UNSPEC_UABDLB, UNSPEC_UABDLT) (UNSPEC_UADDLB, UNSPEC_UADDLT, UNSPEC_UADDWB, UNSPEC_UADDWT) (UNSPEC_UMAXP, UNSPEC_UMINP, UNSPEC_UQRSHRNB, UNSPEC_UQRSHRNT) (UNSPEC_UQSHRNB, UNSPEC_UQSHRNT, UNSPEC_UQXTNB, UNSPEC_UQXTNT) (UNSPEC_USHLLB, UNSPEC_USHLLT, UNSPEC_USUBLB, UNSPEC_USUBLT) (UNSPEC_USUBWB, UNSPEC_USUBWT): New unspecs. (UNSPEC_SMULLB, UNSPEC_SMULLT, UNSPEC_UMULLB, UNSPEC_UMULLT) (UNSPEC_SMULHS, UNSPEC_SMULHRS, UNSPEC_UMULHS, UNSPEC_UMULHRS) (UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SHRNB, UNSPEC_SHRNT): Move further down file. (VNARROW, Ventype): New mode attributes. (Vewtype): Handle VNx2DI. Fix typo in comment. (VDOUBLE): New mode attribute. (sve_lane_con): Handle VNx8HI. (SVE_INT_UNARY): Include ss_abs and ss_neg for TARGET_SVE2. (SVE_INT_BINARY): Likewise ss_plus, us_plus, ss_minus and us_minus. (sve_int_op, sve_int_op_rev): Handle the above codes. (sve_pred_int_rhs2_operand): Likewise. (MULLBT, SHRNB, SHRNT): Delete. (SVE_INT_SHIFT_IMM): New int iterator. (SVE_WHILE): Add UNSPEC_WHILEGE, UNSPEC_WHILEGT, UNSPEC_WHILEHI and UNSPEC_WHILEHS for TARGET_SVE2. (SVE2_U32_UNARY, SVE2_INT_UNARY_NARROWB, SVE2_INT_UNARY_NARROWT) (SVE2_INT_BINARY, SVE2_INT_BINARY_LANE, SVE2_INT_BINARY_LONG) (SVE2_INT_BINARY_LONG_LANE, SVE2_INT_BINARY_NARROWB) (SVE2_INT_BINARY_NARROWT, SVE2_INT_BINARY_PAIR, SVE2_FP_BINARY_PAIR) (SVE2_INT_BINARY_PAIR_LONG, SVE2_INT_BINARY_WIDE): New int iterators. (SVE2_INT_SHIFT_IMM_LONG, SVE2_INT_SHIFT_IMM_NARROWB): Likewise. (SVE2_INT_SHIFT_IMM_NARROWT, SVE2_INT_SHIFT_INSERT, SVE2_INT_CADD) (SVE2_INT_BITPERM, SVE2_INT_TERNARY, SVE2_INT_TERNARY_LANE): Likewise. (SVE2_FP_TERNARY_LONG, SVE2_FP_TERNARY_LONG_LANE, SVE2_INT_CMLA) (SVE2_INT_CDOT, SVE2_INT_ADD_BINARY_LONG, SVE2_INT_QADD_BINARY_LONG) (SVE2_INT_SUB_BINARY_LONG, SVE2_INT_QSUB_BINARY_LONG): Likewise. (SVE2_INT_ADD_BINARY_LONG_LANE, SVE2_INT_QADD_BINARY_LONG_LANE) (SVE2_INT_SUB_BINARY_LONG_LANE, SVE2_INT_QSUB_BINARY_LONG_LANE) (SVE2_COND_INT_UNARY_FP, SVE2_COND_FP_UNARY_LONG): Likewise. (SVE2_COND_FP_UNARY_NARROWB, SVE2_COND_INT_BINARY): Likewise. (SVE2_COND_INT_BINARY_NOREV, SVE2_COND_INT_BINARY_REV): Likewise. (SVE2_COND_INT_SHIFT, SVE2_MATCH, SVE2_PMULL): Likewise. (optab): Handle the new unspecs. (su, r): Remove entries for UNSPEC_SHRNB, UNSPEC_SHRNT, UNSPEC_RSHRNB and UNSPEC_RSHRNT. (lr): Handle the new unspecs. (bt): Delete. (cmp_op, while_optab_cmp, sve_int_op): Handle the new unspecs. (sve_int_op_rev, sve_int_add_op, sve_int_qadd_op, sve_int_sub_op) (sve_int_qsub_op): New int attributes. (sve_fp_op, rot): Handle the new unspecs. * config/aarch64/aarch64-sve-builtins.h (function_resolver::require_matching_pointer_type): Declare. (function_resolver::resolve_unary): Add an optional boolean argument. (function_resolver::finish_opt_n_resolution): Add an optional type_suffix_index argument. (gimple_folder::redirect_call): Declare. (gimple_expander::prepare_gather_address_operands): Add an optional bool parameter. * config/aarch64/aarch64-sve-builtins.cc: Include aarch64-sve-builtins-sve2.h. (TYPES_b_unsigned, TYPES_b_integer, TYPES_bh_integer): New macros. (TYPES_bs_unsigned, TYPES_hs_signed, TYPES_hs_integer): Likewise. (TYPES_hd_unsigned, TYPES_hsd_signed): Likewise. (TYPES_hsd_integer): Use TYPES_hsd_signed. (TYPES_s_float_hsd_integer, TYPES_s_float_sd_integer): New macros. (TYPES_s_unsigned): Likewise. (TYPES_s_integer): Use TYPES_s_unsigned. (TYPES_sd_signed, TYPES_sd_unsigned): New macros. (TYPES_sd_integer): Use them. (TYPES_d_unsigned): New macro. (TYPES_d_integer): Use it. (TYPES_d_data, TYPES_cvt_long, TYPES_cvt_narrow_s): New macros. (TYPES_cvt_narrow): Likewise. (DEF_SVE_TYPES_ARRAY): Include the new types macros above. (preds_mx): New variable. (function_builder::add_overloaded_function): Allow the new feature set to be more restrictive than the original one. (function_resolver::infer_pointer_type): Remove qualifiers from the pointer type before printing it. (function_resolver::require_matching_pointer_type): New function. (function_resolver::resolve_sv_displacement): Handle functions that don't support 32-bit vector indices or svint32_t vector offsets. (function_resolver::finish_opt_n_resolution): Take the inferred type as a separate argument. (function_resolver::resolve_unary): Optionally treat all forms in the same way as normal merging functions. (gimple_folder::redirect_call): New function. (function_expander::prepare_gather_address_operands): Add an argument that says whether scaled forms are available. If they aren't, handle scaling of vector indices and don't add the extension and scaling operands. (function_expander::map_to_unspecs): If aarch64_sve isn't available, fall back to using cond_* instead. * config/aarch64/aarch64-sve-builtins-functions.h (rtx_code_function): Split out the member variables into... (rtx_code_function_base): ...this new base class. (rtx_code_function_rotated): Inherit rtx_code_function_base. (unspec_based_function): Split out the member variables into... (unspec_based_function_base): ...this new base class. (unspec_based_function_rotated): Inherit unspec_based_function_base. (unspec_based_function_exact_insn): New class. (unspec_based_add_function, unspec_based_add_lane_function) (unspec_based_lane_function, unspec_based_pred_function) (unspec_based_qadd_function, unspec_based_qadd_lane_function) (unspec_based_qsub_function, unspec_based_qsub_lane_function) (unspec_based_sub_function, unspec_based_sub_lane_function): New typedefs. (unspec_based_fused_function): New class. (unspec_based_mla_function, unspec_based_mls_function): New typedefs. (unspec_based_fused_lane_function): New class. (unspec_based_mla_lane_function, unspec_based_mls_lane_function): New typedefs. (CODE_FOR_MODE1): New macro. (fixed_insn_function): New class. (while_comparison): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.h (binary_long_lane) (binary_long_opt_n, binary_narrowb_opt_n, binary_narrowt_opt_n) (binary_to_uint, binary_wide, binary_wide_opt_n, compare, compare_ptr) (load_ext_gather_index_restricted, load_ext_gather_offset_restricted) (load_gather_sv_restricted, shift_left_imm_long): Declare. (shift_left_imm_to_uint, shift_right_imm_narrowb): Likewise. (shift_right_imm_narrowt, shift_right_imm_narrowb_to_uint): Likewise. (shift_right_imm_narrowt_to_uint, store_scatter_index_restricted) (store_scatter_offset_restricted, tbl_tuple, ternary_long_lane) (ternary_long_opt_n, ternary_qq_lane_rotate, ternary_qq_rotate) (ternary_shift_left_imm, ternary_shift_right_imm, ternary_uint) (unary_convert_narrowt, unary_long, unary_narrowb, unary_narrowt) (unary_narrowb_to_uint, unary_narrowt_to_uint, unary_to_int): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): Also add an initial argument for unary_convert_narrowt, regardless of the predication type. (build_32_64): Allow loads and stores to specify MODE_none. (build_sv_index64, build_sv_uint_offset): New functions. (long_type_suffix): New function. (binary_imm_narrowb_base, binary_imm_narrowt_base): New classes. (binary_imm_long_base, load_gather_sv_base): Likewise. (shift_right_imm_narrow_wrapper, ternary_shift_imm_base): Likewise. (ternary_resize2_opt_n_base, ternary_resize2_lane_base): Likewise. (unary_narrowb_base, unary_narrowt_base): Likewise. (binary_long_lane_def, binary_long_lane): New shape. (binary_long_opt_n_def, binary_long_opt_n): Likewise. (binary_narrowb_opt_n_def, binary_narrowb_opt_n): Likewise. (binary_narrowt_opt_n_def, binary_narrowt_opt_n): Likewise. (binary_to_uint_def, binary_to_uint): Likewise. (binary_wide_def, binary_wide): Likewise. (binary_wide_opt_n_def, binary_wide_opt_n): Likewise. (compare_def, compare): Likewise. (compare_ptr_def, compare_ptr): Likewise. (load_ext_gather_index_restricted_def, load_ext_gather_index_restricted): Likewise. (load_ext_gather_offset_restricted_def, load_ext_gather_offset_restricted): Likewise. (load_gather_sv_def): Inherit from load_gather_sv_base. (load_gather_sv_restricted_def, load_gather_sv_restricted): New shape. (shift_left_imm_def, shift_left_imm): Likewise. (shift_left_imm_long_def, shift_left_imm_long): Likewise. (shift_left_imm_to_uint_def, shift_left_imm_to_uint): Likewise. (store_scatter_index_restricted_def, store_scatter_index_restricted): Likewise. (store_scatter_offset_restricted_def, store_scatter_offset_restricted): Likewise. (tbl_tuple_def, tbl_tuple): Likewise. (ternary_long_lane_def, ternary_long_lane): Likewise. (ternary_long_opt_n_def, ternary_long_opt_n): Likewise. (ternary_qq_lane_def): Inherit from ternary_resize2_lane_base. (ternary_qq_lane_rotate_def, ternary_qq_lane_rotate): New shape (ternary_qq_opt_n_def): Inherit from ternary_resize2_opt_n_base. (ternary_qq_rotate_def, ternary_qq_rotate): New shape. (ternary_shift_left_imm_def, ternary_shift_left_imm): Likewise. (ternary_shift_right_imm_def, ternary_shift_right_imm): Likewise. (ternary_uint_def, ternary_uint): Likewise. (unary_convert): Fix typo in comment. (unary_convert_narrowt_def, unary_convert_narrowt): New shape. (unary_long_def, unary_long): Likewise. (unary_narrowb_def, unary_narrowb): Likewise. (unary_narrowt_def, unary_narrowt): Likewise. (unary_narrowb_to_uint_def, unary_narrowb_to_uint): Likewise. (unary_narrowt_to_uint_def, unary_narrowt_to_uint): Likewise. (unary_to_int_def, unary_to_int): Likewise. * config/aarch64/aarch64-sve-builtins-base.cc (unspec_cmla) (unspec_fcmla, unspec_cond_fcmla, expand_mla_mls_lane): New functions. (svasrd_impl): Delete. (svcadd_impl::expand): Handle integer operations too. (svcmla_impl::expand, svcmla_lane::expand): Likewise, using the new functions to derive the unspec numbers. (svmla_svmls_lane_impl): Replace with... (svmla_lane_impl, svmls_lane_impl): ...these new classes. Handle integer operations too. (svwhile_impl): Rename to... (svwhilelx_impl): ...this and inherit from while_comparison. (svasrd): Use unspec_based_function. (svmla_lane): Use svmla_lane_impl. (svmls_lane): Use svmls_lane_impl. (svrecpe, svrsqrte): Handle unsigned integer operations too. (svwhilele, svwhilelt): Use svwhilelx_impl. * config/aarch64/aarch64-sve-builtins-sve2.h: New file. * config/aarch64/aarch64-sve-builtins-sve2.cc: Likewise. * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. * config/aarch64/aarch64-sve-builtins.def: Include aarch64-sve-builtins-sve2.def. gcc/testsuite/ * g++.target/aarch64/sve/acle/general-c++/mul_lane_1.c: New test. * g++.target/aarch64/sve2/acle: New directory. * gcc.target/aarch64/pragma_cpp_predefs_3.c: New test. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_TYPE_CHANGE_Z) (TEST_DUAL_ZD, TEST_TYPE_CHANGE_ZX, TEST_TBL2, TEST_TBL2_REV): New macros. * gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c: Do not expect an error saying that the function has no f32 form, but instead expect an error about SVE2 being required if the current target doesn't support SVE2. * gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c: Likewise. * gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c Likewise. * gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c, * gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/compare_1.c, * gcc.target/aarch64/sve/acle/general-c/compare_ptr_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_2.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_3.c, * gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_restricted_4.c, * gcc.target/aarch64/sve/acle/general-c/load_gather_sv_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/load_gather_sv_restricted_2.c, * gcc.target/aarch64/sve/acle/general-c/mul_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_long_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_left_imm_to_uint_2.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/store_scatter_index_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_restricted_1.c, * gcc.target/aarch64/sve/acle/general-c/tbl_tuple_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c, * gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c, * gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c: New tests. * gcc.target/aarch64/sve2/bcax_1.c: Likewise. * gcc.target/aarch64/sve2/acle: New directory. From-SVN: r280060
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/aarch64-c.c5
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc207
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-functions.h278
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-shapes.cc1048
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-shapes.h35
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.cc654
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.def214
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.h191
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc291
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.def1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.h10
-rw-r--r--gcc/config/aarch64/aarch64-sve.md43
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md2129
-rw-r--r--gcc/config/aarch64/aarch64.h16
-rw-r--r--gcc/config/aarch64/aarch64.md6
-rw-r--r--gcc/config/aarch64/iterators.md703
-rw-r--r--gcc/config/aarch64/t-aarch6419
17 files changed, 5542 insertions, 308 deletions
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 9ccca42..b422530 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -150,6 +150,11 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits);
}
aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile);
+ aarch64_def_or_undef (TARGET_SVE2_AES, "__ARM_FEATURE_SVE2_AES", pfile);
+ aarch64_def_or_undef (TARGET_SVE2_BITPERM,
+ "__ARM_FEATURE_SVE2_BITPERM", pfile);
+ aarch64_def_or_undef (TARGET_SVE2_SHA3, "__ARM_FEATURE_SVE2_SHA3", pfile);
+ aarch64_def_or_undef (TARGET_SVE2_SM4, "__ARM_FEATURE_SVE2_SM4", pfile);
aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile);
aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index e6145b4..16a7898 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -49,6 +49,48 @@ using namespace aarch64_sve;
namespace {
+/* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
+static int
+unspec_cmla (int rot)
+{
+ switch (rot)
+ {
+ case 0: return UNSPEC_CMLA;
+ case 90: return UNSPEC_CMLA90;
+ case 180: return UNSPEC_CMLA180;
+ case 270: return UNSPEC_CMLA270;
+ default: gcc_unreachable ();
+ }
+}
+
+/* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */
+static int
+unspec_fcmla (int rot)
+{
+ switch (rot)
+ {
+ case 0: return UNSPEC_FCMLA;
+ case 90: return UNSPEC_FCMLA90;
+ case 180: return UNSPEC_FCMLA180;
+ case 270: return UNSPEC_FCMLA270;
+ default: gcc_unreachable ();
+ }
+}
+
+/* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */
+static int
+unspec_cond_fcmla (int rot)
+{
+ switch (rot)
+ {
+ case 0: return UNSPEC_COND_FCMLA;
+ case 90: return UNSPEC_COND_FCMLA90;
+ case 180: return UNSPEC_COND_FCMLA180;
+ case 270: return UNSPEC_COND_FCMLA270;
+ default: gcc_unreachable ();
+ }
+}
+
/* Expand a call to svmad, or svmla after reordering its operands.
Make _m forms merge with argument MERGE_ARGNO. */
static rtx
@@ -69,6 +111,19 @@ expand_mad (function_expander &e,
return e.use_cond_insn (icode, merge_argno);
}
+/* Expand a call to svmla_lane or svmls_lane using floating-point unspec
+ UNSPEC. */
+static rtx
+expand_mla_mls_lane (function_expander &e, int unspec)
+{
+ /* Put the operands in the normal (fma ...) order, with the accumulator
+ last. This fits naturally since that's also the unprinted operand
+ in the asm output. */
+ e.rotate_inputs_left (0, 4);
+ insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
+ return e.use_exact_insn (icode);
+}
+
/* Expand a call to svmsb, or svmls after reordering its operands.
Make _m forms merge with argument MERGE_ARGNO. */
static rtx
@@ -172,16 +227,6 @@ public:
unsigned int m_shift;
};
-class svasrd_impl : public function_base
-{
-public:
- rtx
- expand (function_expander &e) const OVERRIDE
- {
- return e.use_cond_insn (code_for_cond_asrd (e.vector_mode (0)));
- }
-};
-
class svbic_impl : public function_base
{
public:
@@ -248,12 +293,14 @@ public:
expand (function_expander &e) const OVERRIDE
{
/* Convert the rotation amount into a specific unspec. */
- int rot = INTVAL (e.args[3]);
- e.args.ordered_remove (3);
- int unspec = (rot == 90 ? UNSPEC_COND_FCADD90
- : rot == 270 ? UNSPEC_COND_FCADD270
- : (gcc_unreachable (), 0));
- return e.map_to_unspecs (-1, -1, unspec);
+ int rot = INTVAL (e.args.pop ());
+ if (rot == 90)
+ return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
+ UNSPEC_COND_FCADD90);
+ if (rot == 270)
+ return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
+ UNSPEC_COND_FCADD270);
+ gcc_unreachable ();
}
};
@@ -288,18 +335,19 @@ public:
expand (function_expander &e) const OVERRIDE
{
/* Convert the rotation amount into a specific unspec. */
- int rot = INTVAL (e.args[4]);
- e.args.ordered_remove (4);
- int unspec = (rot == 0 ? UNSPEC_COND_FCMLA
- : rot == 90 ? UNSPEC_COND_FCMLA90
- : rot == 180 ? UNSPEC_COND_FCMLA180
- : rot == 270 ? UNSPEC_COND_FCMLA270
- : (gcc_unreachable (), 0));
-
- /* Make the operand order the same as the one used by the fma optabs,
- with the accumulator last. */
- e.rotate_inputs_left (1, 4);
- return e.map_to_unspecs (-1, -1, unspec, 3);
+ int rot = INTVAL (e.args.pop ());
+ if (e.type_suffix (0).float_p)
+ {
+ /* Make the operand order the same as the one used by the fma optabs,
+ with the accumulator last. */
+ e.rotate_inputs_left (1, 4);
+ return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
+ }
+ else
+ {
+ int cmla = unspec_cmla (rot);
+ return e.map_to_unspecs (cmla, cmla, -1);
+ }
}
};
@@ -310,19 +358,21 @@ public:
expand (function_expander &e) const OVERRIDE
{
/* Convert the rotation amount into a specific unspec. */
- int rot = INTVAL (e.args[4]);
- e.args.ordered_remove (4);
- int unspec = (rot == 0 ? UNSPEC_FCMLA
- : rot == 90 ? UNSPEC_FCMLA90
- : rot == 180 ? UNSPEC_FCMLA180
- : rot == 270 ? UNSPEC_FCMLA270
- : (gcc_unreachable (), 0));
-
- /* Make the operand order the same as the one used by the fma optabs,
- with the accumulator last. */
- e.rotate_inputs_left (0, 4);
- insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
- return e.use_exact_insn (icode);
+ int rot = INTVAL (e.args.pop ());
+ machine_mode mode = e.vector_mode (0);
+ if (e.type_suffix (0).float_p)
+ {
+ /* Make the operand order the same as the one used by the fma optabs,
+ with the accumulator last. */
+ e.rotate_inputs_left (0, 4);
+ insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
+ return e.use_exact_insn (icode);
+ }
+ else
+ {
+ insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
+ return e.use_exact_insn (icode);
+ }
}
};
@@ -1376,26 +1426,19 @@ public:
}
};
-/* Base class for svmla_lane and svmls_lane. */
-class svmla_svmls_lane_impl : public function_base
+class svmla_lane_impl : public function_base
{
public:
- CONSTEXPR svmla_svmls_lane_impl (int unspec)
- : m_unspec (unspec) {}
-
rtx
expand (function_expander &e) const OVERRIDE
{
- /* Put the operands in the normal (fma ...) order, with the accumulator
- last. This fits naturally since that's also the unprinted operand
- in the asm output. */
- e.rotate_inputs_left (0, 4);
- insn_code icode = code_for_aarch64_lane (m_unspec, e.vector_mode (0));
- return e.use_exact_insn (icode);
+ if (e.type_suffix (0).integer_p)
+ {
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
+ }
+ return expand_mla_mls_lane (e, UNSPEC_FMLA);
}
-
- /* The unspec code associated with the operation. */
- int m_unspec;
};
class svmls_impl : public function_base
@@ -1433,6 +1476,21 @@ public:
}
};
+class svmls_lane_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ if (e.type_suffix (0).integer_p)
+ {
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
+ }
+ return expand_mla_mls_lane (e, UNSPEC_FMLS);
+ }
+};
+
class svmsb_impl : public function_base
{
public:
@@ -2275,12 +2333,11 @@ public:
};
/* A function_base for svwhilele and svwhilelt functions. */
-class svwhile_impl : public function_base
+class svwhilelx_impl : public while_comparison
{
public:
- CONSTEXPR svwhile_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
- : m_unspec_for_sint (unspec_for_sint),
- m_unspec_for_uint (unspec_for_uint), m_eq_p (eq_p)
+ CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
+ : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
{}
/* Try to fold a call by treating its arguments as constants of type T. */
@@ -2342,24 +2399,6 @@ public:
return fold_type<poly_int64> (f);
}
- rtx
- expand (function_expander &e) const OVERRIDE
- {
- /* Suffix 0 determines the predicate mode, suffix 1 determines the
- scalar mode and signedness. */
- int unspec = (e.type_suffix (1).unsigned_p
- ? m_unspec_for_uint
- : m_unspec_for_sint);
- machine_mode pred_mode = e.vector_mode (0);
- scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
- return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
- }
-
- /* The unspec codes associated with signed and unsigned operations
- respectively. */
- int m_unspec_for_sint;
- int m_unspec_for_uint;
-
/* True svwhilele, false for svwhilelt. */
bool m_eq_p;
};
@@ -2428,7 +2467,7 @@ FUNCTION (svand, rtx_code_function, (AND, AND))
FUNCTION (svandv, reduction, (UNSPEC_ANDV))
FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
-FUNCTION (svasrd, svasrd_impl,)
+FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
FUNCTION (svbic, svbic_impl,)
FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
@@ -2554,9 +2593,9 @@ FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
FUNCTION (svmla, svmla_impl,)
-FUNCTION (svmla_lane, svmla_svmls_lane_impl, (UNSPEC_FMLA))
+FUNCTION (svmla_lane, svmla_lane_impl,)
FUNCTION (svmls, svmls_impl,)
-FUNCTION (svmls_lane, svmla_svmls_lane_impl, (UNSPEC_FMLS))
+FUNCTION (svmls_lane, svmls_lane_impl,)
FUNCTION (svmov, svmov_impl,)
FUNCTION (svmsb, svmsb_impl,)
FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
@@ -2613,7 +2652,7 @@ FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
FUNCTION (svrdffr, svrdffr_impl,)
-FUNCTION (svrecpe, unspec_based_function, (-1, -1, UNSPEC_FRECPE))
+FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
FUNCTION (svreinterpret, svreinterpret_impl,)
@@ -2628,7 +2667,7 @@ FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
-FUNCTION (svrsqrte, unspec_based_function, (-1, -1, UNSPEC_RSQRTE))
+FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
FUNCTION (svsel, svsel_impl,)
@@ -2666,8 +2705,8 @@ FUNCTION (svunpkhi, svunpk_impl, (true))
FUNCTION (svunpklo, svunpk_impl, (false))
FUNCTION (svuzp1, svuzp_impl, (0))
FUNCTION (svuzp2, svuzp_impl, (1))
-FUNCTION (svwhilele, svwhile_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
-FUNCTION (svwhilelt, svwhile_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
+FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
+FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
FUNCTION (svwrffr, svwrffr_impl,)
FUNCTION (svzip1, svzip_impl, (0))
FUNCTION (svzip2, svzip_impl, (1))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 9782176..71a3943 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -173,23 +173,18 @@ public:
scalar_int_mode m_to_mode;
};
-/* A function_base for functions that have an associated rtx code.
- It supports all forms of predication except PRED_implicit. */
-class rtx_code_function : public function_base
+/* An incomplete function_base for functions that have an associated rtx code.
+ It simply records information about the mapping for derived classes
+ to use. */
+class rtx_code_function_base : public function_base
{
public:
- CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint,
- int unspec_for_fp = -1)
+ CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
+ rtx_code code_for_uint,
+ int unspec_for_fp = -1)
: m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
m_unspec_for_fp (unspec_for_fp) {}
- rtx
- expand (function_expander &e) const OVERRIDE
- {
- return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
- m_unspec_for_fp);
- }
-
/* The rtx code to use for signed and unsigned integers respectively.
Can be UNKNOWN for functions that don't have integer forms. */
rtx_code m_code_for_sint;
@@ -200,18 +195,34 @@ public:
int m_unspec_for_fp;
};
+/* A function_base for functions that have an associated rtx code.
+ It supports all forms of predication except PRED_implicit. */
+class rtx_code_function : public rtx_code_function_base
+{
+public:
+ CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint,
+ int unspec_for_fp = -1)
+ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
+ m_unspec_for_fp);
+ }
+};
+
/* Like rtx_code_function, but for functions that take what is normally
the final argument first. One use of this class is to handle binary
reversed operations; another is to handle MLA-style operations that
are normally expressed in GCC as MAD-style operations. */
-class rtx_code_function_rotated : public function_base
+class rtx_code_function_rotated : public rtx_code_function_base
{
public:
CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint,
rtx_code code_for_uint,
int unspec_for_fp = -1)
- : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
- m_unspec_for_fp (unspec_for_fp) {}
+ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
rtx
expand (function_expander &e) const OVERRIDE
@@ -223,27 +234,48 @@ public:
return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
m_unspec_for_fp, nargs - 1);
}
+};
- /* The rtx code to use for signed and unsigned integers respectively.
- Can be UNKNOWN for functions that don't have integer forms. */
- rtx_code m_code_for_sint;
- rtx_code m_code_for_uint;
+/* An incomplete function_base for functions that have an associated
+ unspec code, with separate codes for signed integers, unsigned
+ integers and floating-point values. The class simply records
+ information about the mapping for derived classes to use. */
+class unspec_based_function_base : public function_base
+{
+public:
+ CONSTEXPR unspec_based_function_base (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_fp)
+ : m_unspec_for_sint (unspec_for_sint),
+ m_unspec_for_uint (unspec_for_uint),
+ m_unspec_for_fp (unspec_for_fp)
+ {}
- /* The UNSPEC_COND_* to use for floating-point operations. Can be -1
- for functions that only operate on integers. */
+ /* Return the unspec code to use for INSTANCE, based on type suffix 0. */
+ int
+ unspec_for (const function_instance &instance) const
+ {
+ return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp
+ : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint
+ : m_unspec_for_sint);
+ }
+
+ /* The unspec code associated with signed-integer, unsigned-integer
+ and floating-point operations respectively. */
+ int m_unspec_for_sint;
+ int m_unspec_for_uint;
int m_unspec_for_fp;
};
/* A function_base for functions that have an associated unspec code.
It supports all forms of predication except PRED_implicit. */
-class unspec_based_function : public function_base
+class unspec_based_function : public unspec_based_function_base
{
public:
CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint,
int unspec_for_fp)
- : m_unspec_for_sint (unspec_for_sint),
- m_unspec_for_uint (unspec_for_uint),
- m_unspec_for_fp (unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
{}
rtx
@@ -252,27 +284,20 @@ public:
return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
m_unspec_for_fp);
}
-
- /* The unspec code associated with signed-integer, unsigned-integer
- and floating-point operations respectively. */
- int m_unspec_for_sint;
- int m_unspec_for_uint;
- int m_unspec_for_fp;
};
/* Like unspec_based_function, but for functions that take what is normally
the final argument first. One use of this class is to handle binary
reversed operations; another is to handle MLA-style operations that
are normally expressed in GCC as MAD-style operations. */
-class unspec_based_function_rotated : public function_base
+class unspec_based_function_rotated : public unspec_based_function_base
{
public:
CONSTEXPR unspec_based_function_rotated (int unspec_for_sint,
int unspec_for_uint,
int unspec_for_fp)
- : m_unspec_for_sint (unspec_for_sint),
- m_unspec_for_uint (unspec_for_uint),
- m_unspec_for_fp (unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
{}
rtx
@@ -285,13 +310,138 @@ public:
return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
m_unspec_for_fp, nargs - 1);
}
+};
- /* The unspec code associated with signed-integer, unsigned-integer
- and floating-point operations respectively. */
- int m_unspec_for_sint;
- int m_unspec_for_uint;
- int m_unspec_for_fp;
+/* Like unspec_based_function, but map the function directly to
+ CODE (UNSPEC, M) instead of using the generic predication-based
+ expansion. where M is the vector mode associated with type suffix 0.
+ This is useful if the unspec doesn't describe the full operation or
+ if the usual predication rules don't apply for some reason. */
+template<insn_code (*CODE) (int, machine_mode)>
+class unspec_based_function_exact_insn : public unspec_based_function_base
+{
+public:
+ CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
+ {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0)));
+ }
+};
+
+/* A function that performs an unspec and then adds it to another value. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
+ unspec_based_add_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane>
+ unspec_based_add_lane_function;
+
+/* Generic unspec-based _lane function. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane>
+ unspec_based_lane_function;
+
+/* A functon that uses aarch64_pred* patterns regardless of the
+ predication type. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_pred>
+ unspec_based_pred_function;
+
+/* Like unspec_based_add_function and unspec_based_add_lane_function,
+ but using saturating addition. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd>
+ unspec_based_qadd_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane>
+ unspec_based_qadd_lane_function;
+
+/* Like unspec_based_sub_function and unspec_based_sub_lane_function,
+ but using saturating subtraction. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub>
+ unspec_based_qsub_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane>
+ unspec_based_qsub_lane_function;
+
+/* A function that performs an unspec and then subtracts it from
+ another value. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
+ unspec_based_sub_function;
+typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
+ unspec_based_sub_lane_function;
+
+/* A function that acts like unspec_based_function_exact_insn<INT_CODE>
+ when operating on integers, but that expands to an (fma ...)-style
+ aarch64_sve* operation when applied to floats. */
+template<insn_code (*INT_CODE) (int, machine_mode)>
+class unspec_based_fused_function : public unspec_based_function_base
+{
+public:
+ CONSTEXPR unspec_based_fused_function (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
+ {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ int unspec = unspec_for (e);
+ insn_code icode;
+ if (e.type_suffix (0).float_p)
+ {
+ /* Put the operands in the normal (fma ...) order, with the accumulator
+ last. This fits naturally since that's also the unprinted operand
+ in the asm output. */
+ e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3);
+ icode = code_for_aarch64_sve (unspec, e.vector_mode (0));
+ }
+ else
+ icode = INT_CODE (unspec, e.vector_mode (0));
+ return e.use_exact_insn (icode);
+ }
+};
+typedef unspec_based_fused_function<code_for_aarch64_sve_add>
+ unspec_based_mla_function;
+typedef unspec_based_fused_function<code_for_aarch64_sve_sub>
+ unspec_based_mls_function;
+
+/* Like unspec_based_fused_function, but for _lane functions. */
+template<insn_code (*INT_CODE) (int, machine_mode)>
+class unspec_based_fused_lane_function : public unspec_based_function_base
+{
+public:
+ CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
+ {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ int unspec = unspec_for (e);
+ insn_code icode;
+ if (e.type_suffix (0).float_p)
+ {
+ /* Put the operands in the normal (fma ...) order, with the accumulator
+ last. This fits naturally since that's also the unprinted operand
+ in the asm output. */
+ e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4);
+ icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
+ }
+ else
+ icode = INT_CODE (unspec, e.vector_mode (0));
+ return e.use_exact_insn (icode);
+ }
};
+typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane>
+ unspec_based_mla_lane_function;
+typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane>
+ unspec_based_mls_lane_function;
/* A function_base that uses CODE_FOR_MODE (M) to get the associated
instruction code, where M is the vector mode associated with type
@@ -311,11 +461,31 @@ public:
mode associated with the first type suffix. */
#define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0>
+/* Likewise for the second type suffix. */
+#define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1>
+
/* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when
operating on floating-point data. */
#define QUIET_CODE_FOR_MODE0(PATTERN) \
quiet< code_for_mode_function<code_for_##PATTERN, 0> >
+/* A function_base for functions that always expand to a fixed insn pattern,
+ regardless of what the suffixes are. */
+class fixed_insn_function : public function_base
+{
+public:
+ CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ return e.use_exact_insn (m_code);
+ }
+
+ /* The instruction to use. */
+ insn_code m_code;
+};
+
/* A function_base for functions that permute their arguments. */
class permute : public quiet<function_base>
{
@@ -456,6 +626,34 @@ public:
rtx_code m_code;
};
+/* A function_base for svwhile* functions. */
+class while_comparison : public function_base
+{
+public:
+ CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint)
+ : m_unspec_for_sint (unspec_for_sint),
+ m_unspec_for_uint (unspec_for_uint)
+ {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Suffix 0 determines the predicate mode, suffix 1 determines the
+ scalar mode and signedness. */
+ int unspec = (e.type_suffix (1).unsigned_p
+ ? m_unspec_for_uint
+ : m_unspec_for_sint);
+ machine_mode pred_mode = e.vector_mode (0);
+ scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
+ return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
+ }
+
+ /* The unspec codes associated with signed and unsigned operations
+ respectively. */
+ int m_unspec_for_sint;
+ int m_unspec_for_uint;
+};
+
}
/* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 69a0621..b047abf 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -63,8 +63,11 @@ apply_predication (const function_instance &instance, tree return_type,
{
argument_types.quick_insert (0, get_svbool_t ());
/* For unary merge operations, the first argument is a vector with
- the same type as the result. */
- if (argument_types.length () == 2 && instance.pred == PRED_m)
+ the same type as the result. For unary_convert_narrowt it also
+ provides the "bottom" half of active elements, and is present
+ for all types of predication. */
+ if ((argument_types.length () == 2 && instance.pred == PRED_m)
+ || instance.shape == shapes::unary_convert_narrowt)
argument_types.quick_insert (0, return_type);
}
}
@@ -286,13 +289,17 @@ build_one (function_builder &b, const char *signature,
group.required_extensions, force_direct_overloads);
}
-/* Add a function instance for every type and predicate combination
- in GROUP, which describes some sort of gather or scatter operation.
- If the function has any type suffixes (as for loads and stores),
- the first function type suffix specifies either a 32-bit or a 64-bit
- type; use MODE32 for the former and MODE64 for the latter. If the
- function has no type suffixes (as for prefetches), add one MODE32 form
- and one MODE64 form for each predication type.
+/* GROUP describes some sort of gather or scatter operation. There are
+ two cases:
+
+ - If the function has any type suffixes (as for loads and stores), the
+ first function type suffix specifies either a 32-bit or a 64-bit type,
+ which in turn selects either MODE32 or MODE64 as the addressing mode.
+ Add a function instance for every type and predicate combination
+ in GROUP for which the associated addressing mode is not MODE_none.
+
+ - If the function has no type suffixes (as for prefetches), add one
+ MODE32 form and one MODE64 form for each predication type.
The other arguments are as for build_all. */
static void
@@ -303,6 +310,7 @@ build_32_64 (function_builder &b, const char *signature,
for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
if (group.types[0][0] == NUM_TYPE_SUFFIXES)
{
+ gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
build_one (b, signature, group, mode32, 0, pi,
force_direct_overloads);
build_one (b, signature, group, mode64, 0, pi,
@@ -314,8 +322,9 @@ build_32_64 (function_builder &b, const char *signature,
unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
gcc_assert (bits == 32 || bits == 64);
mode_suffix_index mode = bits == 32 ? mode32 : mode64;
- build_one (b, signature, group, mode, ti, pi,
- force_direct_overloads);
+ if (mode != MODE_none)
+ build_one (b, signature, group, mode, ti, pi,
+ force_direct_overloads);
}
}
@@ -332,6 +341,15 @@ build_sv_index (function_builder &b, const char *signature,
build_32_64 (b, signature, group, MODE_u32index, MODE_u64index);
}
+/* Like build_sv_index, but only handle 64-bit types. */
+static void
+build_sv_index64 (function_builder &b, const char *signature,
+ const function_group_info &group)
+{
+ build_32_64 (b, signature, group, MODE_none, MODE_s64index);
+ build_32_64 (b, signature, group, MODE_none, MODE_u64index);
+}
+
/* Like build_sv_index, but taking vector byte offsets instead of vector
array indices. */
static void
@@ -342,6 +360,16 @@ build_sv_offset (function_builder &b, const char *signature,
build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset);
}
+/* Like build_sv_offset, but exclude offsets that must be interpreted
+ as signed (i.e. s32offset). */
+static void
+build_sv_uint_offset (function_builder &b, const char *signature,
+ const function_group_info &group)
+{
+ build_32_64 (b, signature, group, MODE_none, MODE_s64offset);
+ build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset);
+}
+
/* For every type and predicate combination in GROUP, add a function
that takes a vector base address and no displacement. The vector
base has the same element size as the first type suffix.
@@ -397,6 +425,21 @@ build_all (function_builder &b, const char *signature,
force_direct_overloads);
}
+/* TYPE is the largest type suffix associated with the arguments of R,
+ but the result is twice as wide. Return the associated type suffix
+ if it exists, otherwise report an appropriate error and return
+ NUM_TYPE_SUFFIXES. */
+static type_suffix_index
+long_type_suffix (function_resolver &r, type_suffix_index type)
+{
+ unsigned int element_bits = type_suffixes[type].element_bits;
+ if (type_suffixes[type].integer_p && element_bits < 64)
+ return find_type_suffix (type_suffixes[type].tclass, element_bits * 2);
+
+ r.report_no_such_form (type);
+ return NUM_TYPE_SUFFIXES;
+}
+
/* Declare the function shape NAME, pointing it to an instance
of class <NAME>_def. */
#define SHAPE(NAME) \
@@ -449,6 +492,94 @@ struct adr_base : public overloaded_base<0>
};
};
+/* Base class for narrowing bottom binary functions that take an
+ immediate second operand. The result is half the size of input
+ and has class CLASS. */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_imm_narrowb_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+ || CLASS == TYPE_unsigned);
+ if (CLASS == TYPE_unsigned)
+ build_all (b, "vhu0,v0,su64", group, MODE_n);
+ else
+ build_all (b, "vh0,v0,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform (1, 1);
+ }
+};
+
+/* The top equivalent of binary_imm_narrowb_base. It takes three arguments,
+ with the first being the values of the even elements, which are typically
+ the result of the narrowb operation. */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct binary_imm_narrowt_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+ || CLASS == TYPE_unsigned);
+ if (CLASS == TYPE_unsigned)
+ build_all (b, "vhu0,vhu0,v0,su64", group, MODE_n);
+ else
+ build_all (b, "vh0,vh0,v0,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE)
+ || !r.require_integer_immediate (i + 2))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+
+/* Base class for long (i.e. narrow op narrow -> wide) binary functions
+ that take an immediate second operand. The type suffix specifies
+ the wider type. */
+struct binary_imm_long_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ build_all (b, "v0,vh0,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type, result_type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_integer_immediate (i + 1)
+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+ return res;
+
+ return r.report_no_such_form (type);
+ }
+};
+
/* Base class for inc_dec and inc_dec_pat. */
struct inc_dec_base : public overloaded_base<0>
{
@@ -518,6 +649,26 @@ struct load_contiguous_base : public overloaded_base<0>
}
};
+/* Base class for gather loads that take a scalar base and a vector
+ displacement (either an offset or an index). */
+struct load_gather_sv_base : public overloaded_base<0>
+{
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ mode_suffix_index mode;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES
+ || (mode = r.resolve_sv_displacement (i + 1, type, true),
+ mode == MODE_none))
+ return error_mark_node;
+
+ return r.resolve_to (mode, type);
+ }
+};
+
/* Base class for load_ext_gather_index and load_ext_gather_offset,
which differ only in the units of the displacement. */
struct load_ext_gather_base : public overloaded_base<1>
@@ -578,6 +729,19 @@ struct prefetch_gather_base : public overloaded_base<0>
}
};
+/* Wraps BASE to provide a narrowing shift right function. Argument N
+ is an immediate shift amount in the range [1, sizeof(<t0>_t) * 4]. */
+template<typename BASE, unsigned int N>
+struct shift_right_imm_narrow_wrapper : public BASE
+{
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ unsigned int bits = c.type_suffix (0).element_bits / 2;
+ return c.require_immediate_range (N, 1, bits);
+ }
+};
+
/* Base class for store_scatter_index and store_scatter_offset,
which differ only in the units of the displacement. */
struct store_scatter_base : public overloaded_base<0>
@@ -607,6 +771,128 @@ struct store_scatter_base : public overloaded_base<0>
}
};
+/* Base class for ternary operations in which the final argument is an
+ immediate shift amount. The derived class should check the range. */
+struct ternary_shift_imm_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ build_all (b, "v0,v0,v0,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform (2, 1);
+ }
+};
+
+/* Base class for ternary operations in which the first argument has the
+ same element type as the result, and in which the second and third
+ arguments have an element type that is derived the first. MODIFIER
+ is the number of element bits in the second and third arguments,
+ or a function_resolver modifier that says how this precision is
+ derived from the first argument's elements. */
+template<unsigned int MODIFIER>
+struct ternary_resize2_opt_n_base : public overloaded_base<0>
+{
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ MODIFIER))
+ return error_mark_node;
+
+ return r.finish_opt_n_resolution (i + 2, i, type, r.SAME_TYPE_CLASS,
+ MODIFIER);
+ }
+};
+
+/* Like ternary_resize2_opt_n_base, but for functions that take a final
+ lane argument. */
+template<unsigned int MODIFIER>
+struct ternary_resize2_lane_base : public overloaded_base<0>
+{
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (4, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ MODIFIER)
+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+ MODIFIER)
+ || !r.require_integer_immediate (i + 3))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+
+/* Base class for narrowing bottom unary functions. The result is half
+ the size of input and has class CLASS. */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct unary_narrowb_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+ || CLASS == TYPE_unsigned);
+ if (CLASS == TYPE_unsigned)
+ build_all (b, "vhu0,v0", group, MODE_none);
+ else
+ build_all (b, "vh0,v0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_unary (CLASS, r.HALF_SIZE);
+ }
+};
+
+/* The top equivalent of unary_imm_narrowb_base. All forms take the values
+ of the even elements as an extra argument, before any governing predicate.
+ These even elements are typically the result of the narrowb operation. */
+template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
+struct unary_narrowt_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS
+ || CLASS == TYPE_unsigned);
+ if (CLASS == TYPE_unsigned)
+ build_all (b, "vhu0,vhu0,v0", group, MODE_none);
+ else
+ build_all (b, "vh0,vh0,v0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+
/* sv<m0>_t svfoo[_m0base]_[m1]index(sv<m0>_t, sv<m1>_t)
for all valid combinations of vector base type <m0> and vector
@@ -719,6 +1005,73 @@ struct binary_lane_def : public overloaded_base<0>
};
SHAPE (binary_lane)
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t, sv<t0:half>_t, uint64_t).
+
+ where the final argument is an integer constant expression in the
+ range [0, 32 / sizeof (<t0>_t) - 1]. */
+struct binary_long_lane_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,vh0,vh0,su64", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type, result_type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_matching_vector_type (i + 1, type)
+ || !r.require_integer_immediate (i + 2)
+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+ return res;
+
+ return r.report_no_such_form (type);
+ }
+
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_lane_index (2);
+ }
+};
+SHAPE (binary_long_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t, sv<t0:half>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0:half>_t, <t0:half>_t). */
+struct binary_long_opt_n_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,vh0,vh0", group, MODE_none);
+ build_all (b, "v0,vh0,sh0", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type, result_type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS,
+ r.SAME_SIZE, result_type);
+ }
+};
+SHAPE (binary_long_opt_n)
+
/* sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0>_t).
i.e. a binary operation in which the final argument is always a scalar
@@ -747,6 +1100,59 @@ struct binary_n_def : public overloaded_base<0>
};
SHAPE (binary_n)
+/* sv<t0:half>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
+ sv<t0:half>_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
+
+ i.e. a version of binary_opt_n in which the output elements are half the
+ width of the input elements. */
+struct binary_narrowb_opt_n_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vh0,v0,v0", group, MODE_none);
+ build_all (b, "vh0,v0,s0", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform_opt_n (2);
+ }
+};
+SHAPE (binary_narrowb_opt_n)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0:half>_t, sv<t0>_t, sv<t0>_t)
+ sv<t0:half>_t svfoo[_n_t0](sv<t0:half>_t, sv<t0>_t, <t0>_t)
+
+ This is the "top" counterpart to binary_narrowb_opt_n. */
+struct binary_narrowt_opt_n_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vh0,vh0,v0,v0", group, MODE_none);
+ build_all (b, "vh0,vh0,v0,s0", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i, i + 1, type, r.SAME_TYPE_CLASS,
+ r.HALF_SIZE))
+ return error_mark_node;
+
+ return r.finish_opt_n_resolution (i + 2, i + 1, type);
+ }
+};
+SHAPE (binary_narrowt_opt_n)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
@@ -827,6 +1233,26 @@ struct binary_scalar_def : public nonoverloaded_base
};
SHAPE (binary_scalar)
+/* sv<t0:uint>_t svfoo[_t0](sv<t0>_t, sv<t0>_t).
+
+ i.e. a version of "binary" that returns unsigned integers. */
+struct binary_to_uint_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vu0,v0,v0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform (2);
+ }
+};
+SHAPE (binary_to_uint)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint>_t)
i.e. a version of "binary" in which the final argument is always an
@@ -969,6 +1395,59 @@ struct binary_uint64_opt_n_def : public overloaded_base<0>
};
SHAPE (binary_uint64_opt_n)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t). */
+struct binary_wide_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vh0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ r.HALF_SIZE))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (binary_wide)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, <t0:half>_t). */
+struct binary_wide_opt_n_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vh0", group, MODE_none);
+ build_all (b, "v0,v0,sh0", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS,
+ r.HALF_SIZE);
+ }
+};
+SHAPE (binary_wide_opt_n)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
<t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t). */
struct clast_def : public overloaded_base<0>
@@ -1009,6 +1488,24 @@ struct clast_def : public overloaded_base<0>
};
SHAPE (clast)
+/* svbool_t svfoo[_t0](sv<t0>_t, sv<t0>_t). */
+struct compare_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vp,v0,v0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform (2);
+ }
+};
+SHAPE (compare)
+
/* svbool_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
svbool_t svfoo[_n_t0](sv<t0>_t, <t0>_t)
@@ -1031,6 +1528,31 @@ struct compare_opt_n_def : public overloaded_base<0>
};
SHAPE (compare_opt_n)
+/* svbool_t svfoo[_t0](const <t0>_t *, const <t0>_t *). */
+struct compare_ptr_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vp,al,al", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_matching_pointer_type (i + 1, i, type))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (compare_ptr)
+
/* svbool_t svfoo_t0[_t1](<t1>_t, <t1>_t)
where _t0 is a _b<bits> suffix that describes the predicate result.
@@ -1456,6 +1978,26 @@ struct load_ext_gather_index_def : public load_ext_gather_base
};
SHAPE (load_ext_gather_index)
+/* sv<t0>_t svfoo_[s64]index_t0(const <X>_t *, svint64_t)
+ sv<t0>_t svfoo_[u64]index_t0(const <X>_t *, svuint64_t)
+
+ sv<t0>_t svfoo[_u32base]_index_t0(svuint32_t, int64_t)
+ sv<t0>_t svfoo[_u64base]_index_t0(svuint64_t, int64_t)
+
+ where <X> is determined by the function base name. This is
+ load_ext_gather_index that doesn't support 32-bit vector indices. */
+struct load_ext_gather_index_restricted_def : public load_ext_gather_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_index);
+ build_sv_index64 (b, "t0,al,d", group);
+ build_vs_index (b, "t0,b,ss64", group);
+ }
+};
+SHAPE (load_ext_gather_index_restricted)
+
/* sv<t0>_t svfoo_[s32]offset_t0(const <X>_t *, svint32_t)
sv<t0>_t svfoo_[s64]offset_t0(const <X>_t *, svint64_t)
sv<t0>_t svfoo_[u32]offset_t0(const <X>_t *, svuint32_t)
@@ -1481,6 +2023,31 @@ struct load_ext_gather_offset_def : public load_ext_gather_base
};
SHAPE (load_ext_gather_offset)
+/* sv<t0>_t svfoo_[s64]offset_t0(const <X>_t *, svint64_t)
+ sv<t0>_t svfoo_[u32]offset_t0(const <X>_t *, svuint32_t)
+ sv<t0>_t svfoo_[u64]offset_t0(const <X>_t *, svuint64_t)
+
+ sv<t0>_t svfoo[_u32base]_t0(svuint32_t)
+ sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
+
+ sv<t0>_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t)
+ sv<t0>_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t)
+
+ where <X> is determined by the function base name. This is
+ load_ext_gather_offset without the s32 vector offset form. */
+struct load_ext_gather_offset_restricted_def : public load_ext_gather_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_offset);
+ build_sv_uint_offset (b, "t0,al,d", group);
+ build_v_base (b, "t0,b", group, true);
+ build_vs_offset (b, "t0,b,ss64", group);
+ }
+};
+SHAPE (load_ext_gather_offset_restricted)
+
/* sv<t0>_t svfoo_[s32]index[_t0](const <t0>_t *, svint32_t)
sv<t0>_t svfoo_[s64]index[_t0](const <t0>_t *, svint64_t)
sv<t0>_t svfoo_[u32]index[_t0](const <t0>_t *, svuint32_t)
@@ -1490,7 +2057,7 @@ SHAPE (load_ext_gather_offset)
sv<t0>_t svfoo_[s64]offset[_t0](const <t0>_t *, svint64_t)
sv<t0>_t svfoo_[u32]offset[_t0](const <t0>_t *, svuint32_t)
sv<t0>_t svfoo_[u64]offset[_t0](const <t0>_t *, svuint64_t). */
-struct load_gather_sv_def : public overloaded_base<0>
+struct load_gather_sv_def : public load_gather_sv_base
{
void
build (function_builder &b, const function_group_info &group) const OVERRIDE
@@ -1500,23 +2067,30 @@ struct load_gather_sv_def : public overloaded_base<0>
build_sv_index (b, "t0,al,d", group);
build_sv_offset (b, "t0,al,d", group);
}
+};
+SHAPE (load_gather_sv)
- tree
- resolve (function_resolver &r) const OVERRIDE
- {
- unsigned int i, nargs;
- mode_suffix_index mode;
- type_suffix_index type;
- if (!r.check_gp_argument (2, i, nargs)
- || (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES
- || (mode = r.resolve_sv_displacement (i + 1, type, true),
- mode == MODE_none))
- return error_mark_node;
+/* sv<t0>_t svfoo_[u32]index[_t0](const <t0>_t *, svuint32_t)
+ sv<t0>_t svfoo_[u64]index[_t0](const <t0>_t *, svuint64_t)
- return r.resolve_to (mode, type);
+ sv<t0>_t svfoo_[s64]offset[_t0](const <t0>_t *, svint64_t)
+ sv<t0>_t svfoo_[u32]offset[_t0](const <t0>_t *, svuint32_t)
+ sv<t0>_t svfoo_[u64]offset[_t0](const <t0>_t *, svuint64_t)
+
+ This is load_gather_sv without the 32-bit vector index forms and
+ without the s32 vector offset form. */
+struct load_gather_sv_restricted_def : public load_gather_sv_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_index);
+ b.add_overloaded_functions (group, MODE_offset);
+ build_sv_index64 (b, "t0,al,d", group);
+ build_sv_uint_offset (b, "t0,al,d", group);
}
};
-SHAPE (load_gather_sv)
+SHAPE (load_gather_sv_restricted)
/* sv<t0>_t svfoo[_u32base]_t0(svuint32_t)
sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
@@ -1748,6 +2322,64 @@ SHAPE (setffr)
/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
where the final argument must be an integer constant expression in the
+ range [0, sizeof (<t0>_t) * 8 - 1]. */
+struct shift_left_imm_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ build_all (b, "v0,v0,su64", group, MODE_n);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_uniform (1, 1);
+ }
+
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ unsigned int bits = c.type_suffix (0).element_bits;
+ return c.require_immediate_range (1, 0, bits - 1);
+ }
+};
+SHAPE (shift_left_imm)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0:half>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [0, sizeof (<t0>_t) * 4 - 1]. */
+struct shift_left_imm_long_def : public binary_imm_long_base
+{
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ unsigned int bits = c.type_suffix (0).element_bits / 2;
+ return c.require_immediate_range (1, 0, bits - 1);
+ }
+};
+SHAPE (shift_left_imm_long)
+
+/* sv<t0:uint>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [0, sizeof (<t0>_t) * 8 - 1]. */
+struct shift_left_imm_to_uint_def : public shift_left_imm_def
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_n);
+ build_all (b, "vu0,v0,su64", group, MODE_n);
+ }
+};
+SHAPE (shift_left_imm_to_uint)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
range [1, sizeof (<t0>_t) * 8]. */
struct shift_right_imm_def : public overloaded_base<0>
{
@@ -1773,6 +2405,42 @@ struct shift_right_imm_def : public overloaded_base<0>
};
SHAPE (shift_right_imm)
+/* sv<t0:half>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 4]. */
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowb_base<>, 1>
+ shift_right_imm_narrowb_def;
+SHAPE (shift_right_imm_narrowb)
+
+/* sv<t0:half>_t svfoo[_n_t0])(sv<t0:half>_t, sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 4]. */
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base<>, 2>
+ shift_right_imm_narrowt_def;
+SHAPE (shift_right_imm_narrowt)
+
+/* sv<t0:uint:half>_t svfoo[_n_t0])(sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 4]. */
+typedef binary_imm_narrowb_base<TYPE_unsigned>
+ binary_imm_narrowb_base_unsigned;
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowb_base_unsigned, 1>
+ shift_right_imm_narrowb_to_uint_def;
+SHAPE (shift_right_imm_narrowb_to_uint)
+
+/* sv<t0:uint:half>_t svfoo[_n_t0])(sv<t0:uint:half>_t, sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 4]. */
+typedef binary_imm_narrowt_base<TYPE_unsigned>
+ binary_imm_narrowt_base_unsigned;
+typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base_unsigned, 2>
+ shift_right_imm_narrowt_to_uint_def;
+SHAPE (shift_right_imm_narrowt_to_uint)
+
/* void svfoo[_t0](<X>_t *, sv<t0>[xN]_t)
void svfoo_vnum[_t0](<X>_t *, int64_t, sv<t0>[xN]_t)
@@ -1830,6 +2498,26 @@ struct store_scatter_index_def : public store_scatter_base
};
SHAPE (store_scatter_index)
+/* void svfoo_[s64]index[_t0](<X>_t *, svint64_t, sv<t0>_t)
+ void svfoo_[u64]index[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+ void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv<t0>_t)
+ void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+ i.e. a version of store_scatter_index that doesn't support 32-bit
+ vector indices. */
+struct store_scatter_index_restricted_def : public store_scatter_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_index);
+ build_sv_index64 (b, "_,as,d,t0", group);
+ build_vs_index (b, "_,b,ss64,t0", group);
+ }
+};
+SHAPE (store_scatter_index_restricted)
+
/* void svfoo_[s32]offset[_t0](<X>_t *, svint32_t, sv<t0>_t)
void svfoo_[s64]offset[_t0](<X>_t *, svint64_t, sv<t0>_t)
void svfoo_[u32]offset[_t0](<X>_t *, svuint32_t, sv<t0>_t)
@@ -1857,6 +2545,57 @@ struct store_scatter_offset_def : public store_scatter_base
};
SHAPE (store_scatter_offset)
+/* void svfoo_[s64]offset[_t0](<X>_t *, svint64_t, sv<t0>_t)
+ void svfoo_[u32]offset[_t0](<X>_t *, svuint32_t, sv<t0>_t)
+ void svfoo_[u64]offset[_t0](<X>_t *, svuint64_t, sv<t0>_t)
+
+ void svfoo[_u32base_t0](svuint32_t, sv<t0>_t)
+ void svfoo[_u64base_t0](svuint64_t, sv<t0>_t)
+
+ void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv<t0>_t)
+ void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv<t0>_t)
+
+ i.e. a version of store_scatter_offset that doesn't support svint32_t
+ offsets. */
+struct store_scatter_offset_restricted_def : public store_scatter_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ b.add_overloaded_functions (group, MODE_offset);
+ build_sv_uint_offset (b, "_,as,d,t0", group);
+ build_v_base (b, "_,b,t0", group);
+ build_vs_offset (b, "_,b,ss64,t0", group);
+ }
+};
+SHAPE (store_scatter_offset_restricted)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t). */
+struct tbl_tuple_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,t0,vu0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (2, i, nargs)
+ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (tbl_tuple)
+
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t)
where the final argument is an integer constant expression in the
@@ -1913,6 +2652,47 @@ struct ternary_lane_rotate_def : public overloaded_base<0>
};
SHAPE (ternary_lane_rotate)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t, sv<t0:half>_t, uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 32 / sizeof (<t0>_t) - 1]. */
+struct ternary_long_lane_def
+ : public ternary_resize2_lane_base<function_resolver::HALF_SIZE>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vh0,vh0,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_lane_index (3);
+ }
+};
+SHAPE (ternary_long_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:half>_t, sv<t0:half>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:half>_t, <t0:half>_t)
+
+ i.e. a version of the standard ternary shape ternary_opt_n in which
+ the element type of the last two arguments is the half-sized
+ equivalent of <t0>. */
+struct ternary_long_opt_n_def
+ : public ternary_resize2_opt_n_base<function_resolver::HALF_SIZE>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vh0,vh0", group, MODE_none);
+ build_all (b, "v0,v0,vh0,sh0", group, MODE_n);
+ }
+};
+SHAPE (ternary_long_opt_n)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t)
sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0>_t, <t0>_t)
@@ -1940,7 +2720,8 @@ SHAPE (ternary_opt_n)
where the final argument is an integer constant expression in the range
[0, 16 / sizeof (<t0>_t) - 1]. */
-struct ternary_qq_lane_def : public overloaded_base<0>
+struct ternary_qq_lane_def
+ : public ternary_resize2_lane_base<function_resolver::QUARTER_SIZE>
{
void
build (function_builder &b, const function_group_info &group) const OVERRIDE
@@ -1949,18 +2730,41 @@ struct ternary_qq_lane_def : public overloaded_base<0>
build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
}
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_lane_index (3, 4);
+ }
+};
+SHAPE (ternary_qq_lane)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+ uint64_t)
+
+ where the final argument is an integer constant expression in
+ {0, 90, 180, 270}. */
+struct ternary_qq_lane_rotate_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vq0,vq0,su64,su64", group, MODE_none);
+ }
+
tree
resolve (function_resolver &r) const OVERRIDE
{
unsigned int i, nargs;
type_suffix_index type;
- if (!r.check_gp_argument (4, i, nargs)
+ if (!r.check_gp_argument (5, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
|| !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
r.QUARTER_SIZE)
|| !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
r.QUARTER_SIZE)
- || !r.require_integer_immediate (i + 3))
+ || !r.require_integer_immediate (i + 3)
+ || !r.require_integer_immediate (i + 4))
return error_mark_node;
return r.resolve_to (r.mode_suffix_id, type);
@@ -1969,10 +2773,11 @@ struct ternary_qq_lane_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (3, 4);
+ return (c.require_immediate_lane_index (3, 4)
+ && c.require_immediate_one_of (4, 0, 90, 180, 270));
}
};
-SHAPE (ternary_qq_lane)
+SHAPE (ternary_qq_lane_rotate)
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0.quarter>_t, sv<t0.quarter>_t)
sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0.quarter>_t, <t0.quarter>_t)
@@ -1980,7 +2785,8 @@ SHAPE (ternary_qq_lane)
i.e. a version of the standard ternary shape ternary_opt_n in which
the element type of the last two arguments is the quarter-sized
equivalent of <t0>. */
-struct ternary_qq_opt_n_def : public overloaded_base<0>
+struct ternary_qq_opt_n_def
+ : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE>
{
void
build (function_builder &b, const function_group_info &group) const OVERRIDE
@@ -1989,23 +2795,47 @@ struct ternary_qq_opt_n_def : public overloaded_base<0>
build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
build_all (b, "v0,v0,vq0,sq0", group, MODE_n);
}
+};
+SHAPE (ternary_qq_opt_n)
+
+/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t,
+ uint64_t)
+
+ where the final argument is an integer constant expression in
+ {0, 90, 180, 270}. */
+struct ternary_qq_rotate_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
+ }
tree
resolve (function_resolver &r) const OVERRIDE
{
unsigned int i, nargs;
type_suffix_index type;
- if (!r.check_gp_argument (3, i, nargs)
+ if (!r.check_gp_argument (4, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
|| !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
- r.QUARTER_SIZE))
+ r.QUARTER_SIZE)
+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+ r.QUARTER_SIZE)
+ || !r.require_integer_immediate (i + 3))
return error_mark_node;
- return r.finish_opt_n_resolution (i + 2, i, type, r.SAME_TYPE_CLASS,
- r.QUARTER_SIZE);
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_one_of (3, 0, 90, 180, 270);
}
};
-SHAPE (ternary_qq_opt_n)
+SHAPE (ternary_qq_rotate)
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t)
@@ -2034,6 +2864,62 @@ struct ternary_rotate_def : public overloaded_base<0>
};
SHAPE (ternary_rotate)
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [0, sizeof (<t0>_t) * 8 - 1]. */
+struct ternary_shift_left_imm_def : public ternary_shift_imm_base
+{
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ unsigned int bits = c.type_suffix (0).element_bits;
+ return c.require_immediate_range (2, 0, bits - 1);
+ }
+};
+SHAPE (ternary_shift_left_imm)
+
+/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, sv<t0>_t, uint64_t)
+
+ where the final argument must be an integer constant expression in the
+ range [1, sizeof (<t0>_t) * 8]. */
+struct ternary_shift_right_imm_def : public ternary_shift_imm_base
+{
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ unsigned int bits = c.type_suffix (0).element_bits;
+ return c.require_immediate_range (2, 1, bits);
+ }
+};
+SHAPE (ternary_shift_right_imm)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0:uint>_t). */
+struct ternary_uint_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,v0,vu0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_matching_vector_type (i + 1, type)
+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+SHAPE (ternary_uint)
+
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, uint64_t)
where the final argument is an integer constant expression in the
@@ -2082,7 +2968,7 @@ struct unary_def : public overloaded_base<0>
};
SHAPE (unary)
-/* sv<t0>_t svfoo_t0[_t1](svbool_t, sv<t1>_t)
+/* sv<t0>_t svfoo_t0[_t1](sv<t1>_t)
where the target type <t0> must be specified explicitly but the source
type <t1> can be inferred. */
@@ -2104,6 +2990,57 @@ struct unary_convert_def : public overloaded_base<1>
};
SHAPE (unary_convert)
+/* sv<t0>_t svfoo_t0[_t1](sv<t0>_t, sv<t1>_t)
+
+ This is a version of unary_convert in which the even-indexed
+ elements are passed in as a first parameter, before any governing
+ predicate. */
+struct unary_convert_narrowt_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_unary (r.type_suffix (0).tclass,
+ r.type_suffix (0).element_bits, true);
+ }
+};
+SHAPE (unary_convert_narrowt)
+
+/* sv<t0>_t svfoo[_t0](sv<t0:half>_t). */
+struct unary_long_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,vh0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type, result_type;
+ if (!r.check_gp_argument (1, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type))
+ return res;
+
+ return r.report_no_such_form (type);
+ }
+};
+SHAPE (unary_long)
+
/* sv<t0>_t svfoo[_n]_t0(<t0>_t). */
struct unary_n_def : public overloaded_base<1>
{
@@ -2124,6 +3061,22 @@ struct unary_n_def : public overloaded_base<1>
};
SHAPE (unary_n)
+/* sv<t0:half>_t svfoo[_t0](sv<t0>_t). */
+typedef unary_narrowb_base<> unary_narrowb_def;
+SHAPE (unary_narrowb)
+
+/* sv<t0:half>_t svfoo[_t0](sv<t0:half>_t, sv<t0>_t). */
+typedef unary_narrowt_base<> unary_narrowt_def;
+SHAPE (unary_narrowt)
+
+/* sv<t0:uint:half>_t svfoo[_t0](sv<t0>_t). */
+typedef unary_narrowb_base<TYPE_unsigned> unary_narrowb_to_uint_def;
+SHAPE (unary_narrowb_to_uint)
+
+/* sv<t0:uint:half>_t svfoo[_t0](sv<t0:uint:half>_t, sv<t0>_t). */
+typedef unary_narrowt_base<TYPE_unsigned> unary_narrowt_to_uint_def;
+SHAPE (unary_narrowt_to_uint)
+
/* svbool_t svfoo(svbool_t). */
struct unary_pred_def : public nonoverloaded_base
{
@@ -2135,6 +3088,27 @@ struct unary_pred_def : public nonoverloaded_base
};
SHAPE (unary_pred)
+/* sv<t0:int>_t svfoo[_t0](sv<t0>_t)
+
+ i.e. a version of "unary" in which the returned vector contains
+ signed integers. */
+struct unary_to_int_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "vs0,v0", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ return r.resolve_unary (TYPE_signed);
+ }
+};
+SHAPE (unary_to_int)
+
/* sv<t0:uint>_t svfoo[_t0](sv<t0>_t)
i.e. a version of "unary" in which the returned vector contains
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index 15137128..2a75a82 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -76,18 +76,27 @@ namespace aarch64_sve
extern const function_shape *const binary;
extern const function_shape *const binary_int_opt_n;
extern const function_shape *const binary_lane;
+ extern const function_shape *const binary_long_lane;
+ extern const function_shape *const binary_long_opt_n;
extern const function_shape *const binary_n;
+ extern const function_shape *const binary_narrowb_opt_n;
+ extern const function_shape *const binary_narrowt_opt_n;
extern const function_shape *const binary_opt_n;
extern const function_shape *const binary_pred;
extern const function_shape *const binary_rotate;
extern const function_shape *const binary_scalar;
+ extern const function_shape *const binary_to_uint;
extern const function_shape *const binary_uint;
extern const function_shape *const binary_uint_n;
extern const function_shape *const binary_uint_opt_n;
extern const function_shape *const binary_uint64_n;
extern const function_shape *const binary_uint64_opt_n;
+ extern const function_shape *const binary_wide;
+ extern const function_shape *const binary_wide_opt_n;
extern const function_shape *const clast;
+ extern const function_shape *const compare;
extern const function_shape *const compare_opt_n;
+ extern const function_shape *const compare_ptr;
extern const function_shape *const compare_scalar;
extern const function_shape *const compare_wide_opt_n;
extern const function_shape *const count_inherent;
@@ -108,8 +117,11 @@ namespace aarch64_sve
extern const function_shape *const load;
extern const function_shape *const load_ext;
extern const function_shape *const load_ext_gather_index;
+ extern const function_shape *const load_ext_gather_index_restricted;
extern const function_shape *const load_ext_gather_offset;
+ extern const function_shape *const load_ext_gather_offset_restricted;
extern const function_shape *const load_gather_sv;
+ extern const function_shape *const load_gather_sv_restricted;
extern const function_shape *const load_gather_vs;
extern const function_shape *const load_replicate;
extern const function_shape *const pattern_pred;
@@ -122,21 +134,44 @@ namespace aarch64_sve
extern const function_shape *const reduction_wide;
extern const function_shape *const set;
extern const function_shape *const setffr;
+ extern const function_shape *const shift_left_imm_long;
+ extern const function_shape *const shift_left_imm_to_uint;
extern const function_shape *const shift_right_imm;
+ extern const function_shape *const shift_right_imm_narrowb;
+ extern const function_shape *const shift_right_imm_narrowt;
+ extern const function_shape *const shift_right_imm_narrowb_to_uint;
+ extern const function_shape *const shift_right_imm_narrowt_to_uint;
extern const function_shape *const store;
extern const function_shape *const store_scatter_index;
+ extern const function_shape *const store_scatter_index_restricted;
extern const function_shape *const store_scatter_offset;
+ extern const function_shape *const store_scatter_offset_restricted;
+ extern const function_shape *const tbl_tuple;
extern const function_shape *const ternary_lane;
extern const function_shape *const ternary_lane_rotate;
+ extern const function_shape *const ternary_long_lane;
+ extern const function_shape *const ternary_long_opt_n;
extern const function_shape *const ternary_opt_n;
extern const function_shape *const ternary_qq_lane;
+ extern const function_shape *const ternary_qq_lane_rotate;
extern const function_shape *const ternary_qq_opt_n;
+ extern const function_shape *const ternary_qq_rotate;
extern const function_shape *const ternary_rotate;
+ extern const function_shape *const ternary_shift_left_imm;
+ extern const function_shape *const ternary_shift_right_imm;
+ extern const function_shape *const ternary_uint;
extern const function_shape *const tmad;
extern const function_shape *const unary;
extern const function_shape *const unary_convert;
+ extern const function_shape *const unary_convert_narrowt;
+ extern const function_shape *const unary_long;
extern const function_shape *const unary_n;
+ extern const function_shape *const unary_narrowb;
+ extern const function_shape *const unary_narrowt;
+ extern const function_shape *const unary_narrowb_to_uint;
+ extern const function_shape *const unary_narrowt_to_uint;
extern const function_shape *const unary_pred;
+ extern const function_shape *const unary_to_int;
extern const function_shape *const unary_to_uint;
extern const function_shape *const unary_uint;
extern const function_shape *const unary_widen;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
new file mode 100644
index 0000000..fa3b506
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -0,0 +1,654 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE2 intrinsics)
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "rtx-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-sve2.h"
+#include "aarch64-sve-builtins-functions.h"
+
+using namespace aarch64_sve;
+
+namespace {
+
+/* Return the UNSPEC_CDOT* unspec for rotation amount ROT. */
+static int
+unspec_cdot (int rot)
+{
+ switch (rot)
+ {
+ case 0: return UNSPEC_CDOT;
+ case 90: return UNSPEC_CDOT90;
+ case 180: return UNSPEC_CDOT180;
+ case 270: return UNSPEC_CDOT270;
+ default: gcc_unreachable ();
+ }
+}
+
+/* Return the UNSPEC_SQRDCMLAH* unspec for rotation amount ROT. */
+static int
+unspec_sqrdcmlah (int rot)
+{
+ switch (rot)
+ {
+ case 0: return UNSPEC_SQRDCMLAH;
+ case 90: return UNSPEC_SQRDCMLAH90;
+ case 180: return UNSPEC_SQRDCMLAH180;
+ case 270: return UNSPEC_SQRDCMLAH270;
+ default: gcc_unreachable ();
+ }
+}
+
+class svaba_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve2_aba (max_code, mode));
+ }
+};
+
+class svcdot_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Convert the rotation amount into a specific unspec. */
+ int rot = INTVAL (e.args.pop ());
+ return e.use_exact_insn (code_for_aarch64_sve (unspec_cdot (rot),
+ e.vector_mode (0)));
+ }
+};
+
+class svcdot_lane_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Convert the rotation amount into a specific unspec. */
+ int rot = INTVAL (e.args.pop ());
+ return e.use_exact_insn (code_for_aarch64_lane (unspec_cdot (rot),
+ e.vector_mode (0)));
+ }
+};
+
+class svldnt1_gather_impl : public full_width_access
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const OVERRIDE
+ {
+ return CP_READ_MEMORY;
+ }
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ e.prepare_gather_address_operands (1, false);
+ machine_mode mem_mode = e.memory_vector_mode ();
+ return e.use_exact_insn (code_for_aarch64_gather_ldnt (mem_mode));
+ }
+};
+
+/* Implements extending forms of svldnt1_gather. */
+class svldnt1_gather_extend_impl : public extending_load
+{
+public:
+ CONSTEXPR svldnt1_gather_extend_impl (type_suffix_index memory_type)
+ : extending_load (memory_type) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ e.prepare_gather_address_operands (1, false);
+ /* Add a constant predicate for the extension rtx. */
+ e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+ insn_code icode = code_for_aarch64_gather_ldnt (extend_rtx_code (),
+ e.vector_mode (0),
+ e.memory_vector_mode ());
+ return e.use_exact_insn (icode);
+ }
+};
+
+/* Implements both svmatch and svnmatch; the unspec parameter decides
+ between them. */
+class svmatch_svnmatch_impl : public function_base
+{
+public:
+ CONSTEXPR svmatch_svnmatch_impl (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* These are UNSPEC_PRED_Z operations and so need a hint operand. */
+ e.add_ptrue_hint (0, e.gp_mode (0));
+ return e.use_exact_insn (code_for_aarch64_pred (m_unspec,
+ e.vector_mode (0)));
+ }
+
+ int m_unspec;
+};
+
+/* Implements both svmovlb and svmovlt; the unspec parameters decide
+ between them. */
+class svmovl_lb_impl : public unspec_based_function_base
+{
+public:
+ CONSTEXPR svmovl_lb_impl (int unspec_for_sint, int unspec_for_uint,
+ int unspec_for_fp)
+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
+ unspec_for_fp)
+ {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ e.args.quick_push (const0_rtx);
+ return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
+ m_unspec_for_fp);
+ }
+};
+
+class svqcadd_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Convert the rotation amount into a specific unspec. */
+ int rot = INTVAL (e.args.pop ());
+ if (rot == 90)
+ return e.map_to_unspecs (UNSPEC_SQCADD90, -1, -1);
+ if (rot == 270)
+ return e.map_to_unspecs (UNSPEC_SQCADD270, -1, -1);
+ gcc_unreachable ();
+ }
+};
+
+class svqrdcmlah_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Convert the rotation amount into a specific unspec. */
+ int rot = INTVAL (e.args.pop ());
+ return e.use_exact_insn (code_for_aarch64_sve (unspec_sqrdcmlah (rot),
+ e.vector_mode (0)));
+ }
+};
+
+class svqrdcmlah_lane_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* Convert the rotation amount into a specific unspec. */
+ int rot = INTVAL (e.args.pop ());
+ return e.use_exact_insn (code_for_aarch64_lane (unspec_sqrdcmlah (rot),
+ e.vector_mode (0)));
+ }
+};
+
+class svqrshl_impl : public unspec_based_function
+{
+public:
+ CONSTEXPR svqrshl_impl ()
+ : unspec_based_function (UNSPEC_SQRSHL, UNSPEC_UQRSHL, -1) {}
+
+ gimple *
+ fold (gimple_folder &f) const OVERRIDE
+ {
+ if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+ {
+ if (wi::to_widest (amount) >= 0)
+ {
+ /* The rounding has no effect, and [SU]QSHL has immediate forms
+ that we can use for sensible shift amounts. */
+ function_instance instance ("svqshl", functions::svqshl,
+ shapes::binary_int_opt_n, MODE_n,
+ f.type_suffix_ids, f.pred);
+ return f.redirect_call (instance);
+ }
+ else
+ {
+ /* The saturation has no effect, and [SU]RSHL has immediate forms
+ that we can use for sensible shift amounts. */
+ function_instance instance ("svrshl", functions::svrshl,
+ shapes::binary_int_opt_n, MODE_n,
+ f.type_suffix_ids, f.pred);
+ return f.redirect_call (instance);
+ }
+ }
+ return NULL;
+ }
+};
+
+class svqshl_impl : public unspec_based_function
+{
+public:
+ CONSTEXPR svqshl_impl ()
+ : unspec_based_function (UNSPEC_SQSHL, UNSPEC_UQSHL, -1) {}
+
+ gimple *
+ fold (gimple_folder &f) const OVERRIDE
+ {
+ if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+ {
+ int element_bits = f.type_suffix (0).element_bits;
+ if (wi::to_widest (amount) >= -element_bits
+ && wi::to_widest (amount) < 0)
+ {
+ /* The saturation has no effect for right shifts, so we can
+ use the immediate form of ASR or LSR. */
+ amount = wide_int_to_tree (TREE_TYPE (amount),
+ -wi::to_wide (amount));
+ function_instance instance ("svasr", functions::svasr,
+ shapes::binary_uint_opt_n, MODE_n,
+ f.type_suffix_ids, f.pred);
+ if (f.type_suffix (0).unsigned_p)
+ {
+ instance.base_name = "svlsr";
+ instance.base = functions::svlsr;
+ }
+ gcall *call = as_a <gcall *> (f.redirect_call (instance));
+ gimple_call_set_arg (call, 2, amount);
+ return call;
+ }
+ }
+ return NULL;
+ }
+};
+
+class svrshl_impl : public unspec_based_function
+{
+public:
+ CONSTEXPR svrshl_impl ()
+ : unspec_based_function (UNSPEC_SRSHL, UNSPEC_URSHL, -1) {}
+
+ gimple *
+ fold (gimple_folder &f) const OVERRIDE
+ {
+ if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2)))
+ {
+ if (wi::to_widest (amount) >= 0)
+ {
+ /* The rounding has no effect, and LSL has immediate forms
+ that we can use for sensible shift amounts. */
+ function_instance instance ("svlsl", functions::svlsl,
+ shapes::binary_uint_opt_n, MODE_n,
+ f.type_suffix_ids, f.pred);
+ gcall *call = as_a <gcall *> (f.redirect_call (instance));
+ gimple_call_set_arg (call, 2, amount);
+ return call;
+ }
+ int element_bits = f.type_suffix (0).element_bits;
+ if (wi::to_widest (amount) >= -element_bits)
+ {
+ /* The shift amount is in range of [SU]RSHR. */
+ amount = wide_int_to_tree (TREE_TYPE (amount),
+ -wi::to_wide (amount));
+ function_instance instance ("svrshr", functions::svrshr,
+ shapes::shift_right_imm, MODE_n,
+ f.type_suffix_ids, f.pred);
+ gcall *call = as_a <gcall *> (f.redirect_call (instance));
+ gimple_call_set_arg (call, 2, amount);
+ return call;
+ }
+ }
+ return NULL;
+ }
+};
+
+class svsqadd_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ machine_mode mode = e.vector_mode (0);
+ if (e.pred == PRED_x
+ && aarch64_sve_sqadd_sqsub_immediate_p (mode, e.args[2], false))
+ return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1);
+ return e.map_to_unspecs (-1, UNSPEC_USQADD, -1);
+ }
+};
+
+class svsra_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ rtx_code shift_code = e.type_suffix (0).unsigned_p ? LSHIFTRT : ASHIFTRT;
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve_add (shift_code, mode));
+ }
+};
+
+class svstnt1_scatter_impl : public full_width_access
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const OVERRIDE
+ {
+ return CP_WRITE_MEMORY;
+ }
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ e.prepare_gather_address_operands (1, false);
+ machine_mode mem_mode = e.memory_vector_mode ();
+ return e.use_exact_insn (code_for_aarch64_scatter_stnt (mem_mode));
+ }
+};
+
+/* Implements truncating forms of svstnt1_scatter. */
+class svstnt1_scatter_truncate_impl : public truncating_store
+{
+public:
+ CONSTEXPR svstnt1_scatter_truncate_impl (scalar_int_mode to_mode)
+ : truncating_store (to_mode) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ e.prepare_gather_address_operands (1, false);
+ insn_code icode = code_for_aarch64_scatter_stnt (e.vector_mode (0),
+ e.memory_vector_mode ());
+ return e.use_exact_insn (icode);
+ }
+};
+
+class svtbl2_impl : public quiet<multi_vector_function>
+{
+public:
+ CONSTEXPR svtbl2_impl () : quiet<multi_vector_function> (2) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ return e.use_exact_insn (code_for_aarch64_sve2_tbl2 (e.vector_mode (0)));
+ }
+};
+
+class svuqadd_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ machine_mode mode = e.vector_mode (0);
+ if (e.pred == PRED_x
+ && aarch64_sve_arith_immediate_p (mode, e.args[2], false))
+ return e.use_unpred_insn (code_for_aarch64_sve_suqadd_const (mode));
+ return e.map_to_unspecs (UNSPEC_SUQADD, -1, -1);
+ }
+};
+
+/* Implements both svwhilerw and svwhilewr; the unspec parameter decides
+ between them. */
+class svwhilerw_svwhilewr_impl : public full_width_access
+{
+public:
+ CONSTEXPR svwhilerw_svwhilewr_impl (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ return e.use_exact_insn (code_for_while (m_unspec, Pmode, e.gp_mode (0)));
+ }
+
+ int m_unspec;
+};
+
+} /* end anonymous namespace */
+
+namespace aarch64_sve {
+
+FUNCTION (svaba, svaba_impl,)
+FUNCTION (svabalb, unspec_based_add_function, (UNSPEC_SABDLB,
+ UNSPEC_UABDLB, -1))
+FUNCTION (svabalt, unspec_based_add_function, (UNSPEC_SABDLT,
+ UNSPEC_UABDLT, -1))
+FUNCTION (svadclb, unspec_based_function, (-1, UNSPEC_ADCLB, -1))
+FUNCTION (svadclt, unspec_based_function, (-1, UNSPEC_ADCLT, -1))
+FUNCTION (svaddhnb, unspec_based_function, (UNSPEC_ADDHNB, UNSPEC_ADDHNB, -1))
+FUNCTION (svaddhnt, unspec_based_function, (UNSPEC_ADDHNT, UNSPEC_ADDHNT, -1))
+FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1))
+FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1))
+FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1))
+FUNCTION (svaddlb, unspec_based_function, (UNSPEC_SADDLB, UNSPEC_UADDLB, -1))
+FUNCTION (svaddlbt, unspec_based_function, (UNSPEC_SADDLBT, -1, -1))
+FUNCTION (svaddlt, unspec_based_function, (UNSPEC_SADDLT, UNSPEC_UADDLT, -1))
+FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1))
+FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1))
+FUNCTION (svaddp, unspec_based_pred_function, (UNSPEC_ADDP, UNSPEC_ADDP,
+ UNSPEC_FADDP))
+FUNCTION (svaesd, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesd))
+FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese))
+FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc))
+FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
+FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
+FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
+FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1))
+FUNCTION (svbgrp, unspec_based_function, (UNSPEC_BGRP, UNSPEC_BGRP, -1))
+FUNCTION (svbsl, CODE_FOR_MODE0 (aarch64_sve2_bsl),)
+FUNCTION (svbsl1n, CODE_FOR_MODE0 (aarch64_sve2_bsl1n),)
+FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
+FUNCTION (svcdot, svcdot_impl,)
+FUNCTION (svcdot_lane, svcdot_lane_impl,)
+FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
+FUNCTION (svcvtnt, CODE_FOR_MODE1 (aarch64_sve2_cvtnt),)
+FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
+FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
+FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
+FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
+FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
+FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
+FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
+FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
+FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
+FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
+ UNSPEC_UHSUB, -1))
+FUNCTION (svldnt1_gather, svldnt1_gather_impl,)
+FUNCTION (svldnt1sb_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s8))
+FUNCTION (svldnt1sh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s16))
+FUNCTION (svldnt1sw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s32))
+FUNCTION (svldnt1ub_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u8))
+FUNCTION (svldnt1uh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u16))
+FUNCTION (svldnt1uw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u32))
+FUNCTION (svlogb, unspec_based_function, (-1, -1, UNSPEC_COND_FLOGB))
+FUNCTION (svmatch, svmatch_svnmatch_impl, (UNSPEC_MATCH))
+FUNCTION (svmaxp, unspec_based_pred_function, (UNSPEC_SMAXP, UNSPEC_UMAXP,
+ UNSPEC_FMAXP))
+FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP))
+FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP,
+ UNSPEC_FMINP))
+FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP))
+FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB,
+ UNSPEC_UMULLB, UNSPEC_FMLALB))
+FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB,
+ UNSPEC_UMULLB,
+ UNSPEC_FMLALB))
+FUNCTION (svmlalt, unspec_based_mla_function, (UNSPEC_SMULLT,
+ UNSPEC_UMULLT, UNSPEC_FMLALT))
+FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLT,
+ UNSPEC_UMULLT,
+ UNSPEC_FMLALT))
+FUNCTION (svmlslb, unspec_based_mls_function, (UNSPEC_SMULLB,
+ UNSPEC_UMULLB, UNSPEC_FMLSLB))
+FUNCTION (svmlslb_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLB,
+ UNSPEC_UMULLB,
+ UNSPEC_FMLSLB))
+FUNCTION (svmlslt, unspec_based_mls_function, (UNSPEC_SMULLT,
+ UNSPEC_UMULLT, UNSPEC_FMLSLT))
+FUNCTION (svmlslt_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLT,
+ UNSPEC_UMULLT,
+ UNSPEC_FMLSLT))
+FUNCTION (svmovlb, svmovl_lb_impl, (UNSPEC_SSHLLB, UNSPEC_USHLLB, -1))
+FUNCTION (svmovlt, svmovl_lb_impl, (UNSPEC_SSHLLT, UNSPEC_USHLLT, -1))
+FUNCTION (svmullb, unspec_based_function, (UNSPEC_SMULLB, UNSPEC_UMULLB, -1))
+FUNCTION (svmullb_lane, unspec_based_lane_function, (UNSPEC_SMULLB,
+ UNSPEC_UMULLB, -1))
+FUNCTION (svmullt, unspec_based_function, (UNSPEC_SMULLT, UNSPEC_UMULLT, -1))
+FUNCTION (svmullt_lane, unspec_based_lane_function, (UNSPEC_SMULLT,
+ UNSPEC_UMULLT, -1))
+FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
+FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
+FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
+FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
+FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
+FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1))
+FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1))
+FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN))
+FUNCTION (svqcadd, svqcadd_impl,)
+FUNCTION (svqdmlalb, unspec_based_qadd_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmlalb_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLB,
+ -1, -1))
+FUNCTION (svqdmlalbt, unspec_based_qadd_function, (UNSPEC_SQDMULLBT, -1, -1))
+FUNCTION (svqdmlalt, unspec_based_qadd_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmlalt_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLT,
+ -1, -1))
+FUNCTION (svqdmlslb, unspec_based_qsub_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmlslb_lane, unspec_based_qsub_lane_function, (UNSPEC_SQDMULLB,
+ -1, -1))
+FUNCTION (svqdmlslbt, unspec_based_qsub_function, (UNSPEC_SQDMULLBT, -1, -1))
+FUNCTION (svqdmlslt, unspec_based_qsub_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmlslt_lane, unspec_based_qsub_lane_function, (UNSPEC_SQDMULLT,
+ -1, -1))
+FUNCTION (svqdmulh, unspec_based_function, (UNSPEC_SQDMULH, -1, -1))
+FUNCTION (svqdmulh_lane, unspec_based_lane_function, (UNSPEC_SQDMULH, -1, -1))
+FUNCTION (svqdmullb, unspec_based_function, (UNSPEC_SQDMULLB, -1, -1))
+FUNCTION (svqdmullb_lane, unspec_based_lane_function, (UNSPEC_SQDMULLB,
+ -1, -1))
+FUNCTION (svqdmullt, unspec_based_function, (UNSPEC_SQDMULLT, -1, -1))
+FUNCTION (svqdmullt_lane, unspec_based_lane_function, (UNSPEC_SQDMULLT,
+ -1, -1))
+FUNCTION (svqneg, rtx_code_function, (SS_NEG, UNKNOWN, UNKNOWN))
+FUNCTION (svqrdcmlah, svqrdcmlah_impl,)
+FUNCTION (svqrdcmlah_lane, svqrdcmlah_lane_impl,)
+FUNCTION (svqrdmulh, unspec_based_function, (UNSPEC_SQRDMULH, -1, -1))
+FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH,
+ -1, -1))
+FUNCTION (svqrdmlah, unspec_based_function, (UNSPEC_SQRDMLAH, -1, -1))
+FUNCTION (svqrdmlah_lane, unspec_based_lane_function, (UNSPEC_SQRDMLAH,
+ -1, -1))
+FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1))
+FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH,
+ -1, -1))
+FUNCTION (svqrshl, svqrshl_impl,)
+FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
+ UNSPEC_UQRSHRNB, -1))
+FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
+ UNSPEC_UQRSHRNT, -1))
+FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
+FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
+FUNCTION (svqshl, svqshl_impl,)
+FUNCTION (svqshlu, unspec_based_function, (UNSPEC_SQSHLU, -1, -1))
+FUNCTION (svqshrnb, unspec_based_function, (UNSPEC_SQSHRNB,
+ UNSPEC_UQSHRNB, -1))
+FUNCTION (svqshrnt, unspec_based_function, (UNSPEC_SQSHRNT,
+ UNSPEC_UQSHRNT, -1))
+FUNCTION (svqshrunb, unspec_based_function, (UNSPEC_SQSHRUNB, -1, -1))
+FUNCTION (svqshrunt, unspec_based_function, (UNSPEC_SQSHRUNT, -1, -1))
+FUNCTION (svqsubr, rtx_code_function_rotated, (SS_MINUS, US_MINUS, -1))
+FUNCTION (svqxtnb, unspec_based_function, (UNSPEC_SQXTNB, UNSPEC_UQXTNB, -1))
+FUNCTION (svqxtnt, unspec_based_function, (UNSPEC_SQXTNT, UNSPEC_UQXTNT, -1))
+FUNCTION (svqxtunb, unspec_based_function, (UNSPEC_SQXTUNB, -1, -1))
+FUNCTION (svqxtunt, unspec_based_function, (UNSPEC_SQXTUNT, -1, -1))
+FUNCTION (svraddhnb, unspec_based_function, (UNSPEC_RADDHNB,
+ UNSPEC_RADDHNB, -1))
+FUNCTION (svraddhnt, unspec_based_function, (UNSPEC_RADDHNT,
+ UNSPEC_RADDHNT, -1))
+FUNCTION (svrax1, fixed_insn_function, (CODE_FOR_aarch64_sve2_rax1))
+FUNCTION (svrhadd, unspec_based_function, (UNSPEC_SRHADD, UNSPEC_URHADD, -1))
+FUNCTION (svrshl, svrshl_impl,)
+FUNCTION (svrshr, unspec_based_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
+FUNCTION (svrshrnb, unspec_based_function, (UNSPEC_RSHRNB, UNSPEC_RSHRNB, -1))
+FUNCTION (svrshrnt, unspec_based_function, (UNSPEC_RSHRNT, UNSPEC_RSHRNT, -1))
+FUNCTION (svrsra, unspec_based_add_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1))
+FUNCTION (svrsubhnb, unspec_based_function, (UNSPEC_RSUBHNB,
+ UNSPEC_RSUBHNB, -1))
+FUNCTION (svrsubhnt, unspec_based_function, (UNSPEC_RSUBHNT,
+ UNSPEC_RSUBHNT, -1))
+FUNCTION (svsbclb, unspec_based_function, (-1, UNSPEC_SBCLB, -1))
+FUNCTION (svsbclt, unspec_based_function, (-1, UNSPEC_SBCLT, -1))
+FUNCTION (svshllb, unspec_based_function, (UNSPEC_SSHLLB, UNSPEC_USHLLB, -1))
+FUNCTION (svshllt, unspec_based_function, (UNSPEC_SSHLLT, UNSPEC_USHLLT, -1))
+FUNCTION (svshrnb, unspec_based_function, (UNSPEC_SHRNB, UNSPEC_SHRNB, -1))
+FUNCTION (svshrnt, unspec_based_function, (UNSPEC_SHRNT, UNSPEC_SHRNT, -1))
+FUNCTION (svsli, unspec_based_function, (UNSPEC_SLI, UNSPEC_SLI, -1))
+FUNCTION (svsm4e, fixed_insn_function, (CODE_FOR_aarch64_sve2_sm4e))
+FUNCTION (svsm4ekey, fixed_insn_function, (CODE_FOR_aarch64_sve2_sm4ekey))
+FUNCTION (svsqadd, svsqadd_impl,)
+FUNCTION (svsra, svsra_impl,)
+FUNCTION (svsri, unspec_based_function, (UNSPEC_SRI, UNSPEC_SRI, -1))
+FUNCTION (svstnt1_scatter, svstnt1_scatter_impl,)
+FUNCTION (svstnt1b_scatter, svstnt1_scatter_truncate_impl, (QImode))
+FUNCTION (svstnt1h_scatter, svstnt1_scatter_truncate_impl, (HImode))
+FUNCTION (svstnt1w_scatter, svstnt1_scatter_truncate_impl, (SImode))
+FUNCTION (svsubhnb, unspec_based_function, (UNSPEC_SUBHNB, UNSPEC_SUBHNB, -1))
+FUNCTION (svsubhnt, unspec_based_function, (UNSPEC_SUBHNT, UNSPEC_SUBHNT, -1))
+FUNCTION (svsublb, unspec_based_function, (UNSPEC_SSUBLB, UNSPEC_USUBLB, -1))
+FUNCTION (svsublbt, unspec_based_function, (UNSPEC_SSUBLBT, -1, -1))
+FUNCTION (svsublt, unspec_based_function, (UNSPEC_SSUBLT, UNSPEC_USUBLT, -1))
+FUNCTION (svsubltb, unspec_based_function, (UNSPEC_SSUBLTB, -1, -1))
+FUNCTION (svsubwb, unspec_based_function, (UNSPEC_SSUBWB, UNSPEC_USUBWB, -1))
+FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1))
+FUNCTION (svtbl2, svtbl2_impl,)
+FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),)
+FUNCTION (svuqadd, svuqadd_impl,)
+FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
+FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
+FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
+FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
+FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+
+} /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
new file mode 100644
index 0000000..5ab41c3
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -0,0 +1,214 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2
+DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svadalp, binary_wide, hsd_integer, mxz)
+DEF_SVE_FUNCTION (svadclb, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svadclt, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svaddhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddlb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddlbt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svaddlt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddp, binary, all_data, mx)
+DEF_SVE_FUNCTION (svaddwb, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svaddwt, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svbcax, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl1n, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svbsl2n, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svcadd, binary_rotate, all_integer, none)
+DEF_SVE_FUNCTION (svcdot, ternary_qq_rotate, sd_signed, none)
+DEF_SVE_FUNCTION (svcdot_lane, ternary_qq_lane_rotate, sd_signed, none)
+DEF_SVE_FUNCTION (svcmla, ternary_rotate, all_integer, none)
+DEF_SVE_FUNCTION (svcmla_lane, ternary_lane_rotate, hs_integer, none)
+DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, mx)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, mx)
+DEF_SVE_FUNCTION (svcvtx, unary_convert, cvt_narrow_s, mxz)
+DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, mx)
+DEF_SVE_FUNCTION (sveor3, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (sveorbt, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (sveortb, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svhadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z)
+DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none)
+DEF_SVE_FUNCTION (svhsub, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svhsubr, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit)
+DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svlogb, unary_to_int, all_float, mxz)
+DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit)
+DEF_SVE_FUNCTION (svmaxp, binary, all_data, mx)
+DEF_SVE_FUNCTION (svmaxnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svmla_lane, ternary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalb, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalb_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmlalt, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlalt_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmls_lane, ternary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslb, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslb_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svmlslt, ternary_long_opt_n, s_float_hsd_integer, none)
+DEF_SVE_FUNCTION (svmlslt_lane, ternary_long_lane, s_float_sd_integer, none)
+DEF_SVE_FUNCTION (svminp, binary, all_data, mx)
+DEF_SVE_FUNCTION (svminnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svmovlb, unary_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svmovlt, unary_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svmul_lane, binary_lane, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullb_lane, binary_long_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svmullt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svmullt_lane, binary_long_lane, sd_integer, none)
+DEF_SVE_FUNCTION (svnbsl, ternary_opt_n, all_integer, none)
+DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit)
+DEF_SVE_FUNCTION (svpmul, binary_opt_n, b_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb, binary_long_opt_n, hd_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, bs_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt, binary_long_opt_n, hd_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, bs_unsigned, none)
+DEF_SVE_FUNCTION (svqabs, unary, all_signed, mxz)
+DEF_SVE_FUNCTION (svqadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqcadd, binary_rotate, all_signed, none)
+DEF_SVE_FUNCTION (svqdmlalb, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalb_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalbt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlalt_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslb, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslb_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslbt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslt, ternary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmlslt_lane, ternary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullb, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqdmullt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svqdmullt_lane, binary_long_lane, sd_signed, none)
+DEF_SVE_FUNCTION (svqneg, unary, all_signed, mxz)
+DEF_SVE_FUNCTION (svqrdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdmlah, ternary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmlah_lane, ternary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdmlsh, ternary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmlsh_lane, ternary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah, ternary_rotate, all_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah_lane, ternary_lane_rotate, hs_signed, none)
+DEF_SVE_FUNCTION (svqrshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqrshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqrshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrshrunt, shift_right_imm_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqshlu, shift_left_imm_to_uint, all_signed, mxz)
+DEF_SVE_FUNCTION (svqshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqshrunt, shift_right_imm_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqsubr, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svqxtnb, unary_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svqxtnt, unary_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svqxtunb, unary_narrowb_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svqxtunt, unary_narrowt_to_uint, hsd_signed, none)
+DEF_SVE_FUNCTION (svraddhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svraddhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz)
+DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
+DEF_SVE_FUNCTION (svrshl, binary_int_opt_n, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz)
+DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svrsra, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svrsubhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svrsubhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsbclb, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svsbclt, ternary_opt_n, sd_unsigned, none)
+DEF_SVE_FUNCTION (svshllb, shift_left_imm_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svshllt, shift_left_imm_long, hsd_integer, none)
+DEF_SVE_FUNCTION (svshrnb, shift_right_imm_narrowb, hsd_integer, none)
+DEF_SVE_FUNCTION (svshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svsli, ternary_shift_left_imm, all_integer, none)
+DEF_SVE_FUNCTION (svsqadd, binary_int_opt_n, all_unsigned, mxz)
+DEF_SVE_FUNCTION (svsra, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svsri, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit)
+DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svsubhnb, binary_narrowb_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubhnt, binary_narrowt_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsublb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsublbt, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svsublt, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubltb, binary_long_opt_n, hsd_signed, none)
+DEF_SVE_FUNCTION (svsubwb, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svsubwt, binary_wide_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svtbl2, tbl_tuple, all_data, none)
+DEF_SVE_FUNCTION (svtbx, ternary_uint, all_data, none)
+DEF_SVE_FUNCTION (svuqadd, binary_uint_opt_n, all_signed, mxz)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar, while, none)
+DEF_SVE_FUNCTION (svwhilerw, compare_ptr, all_data, none)
+DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
+DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES)
+DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaesimc, unary, b_unsigned, none)
+DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
+DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM)
+DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
+DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
+DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3)
+DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4)
+DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
+DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
new file mode 100644
index 0000000..90e29fc
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -0,0 +1,191 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_SVE2_H
+#define GCC_AARCH64_SVE_BUILTINS_SVE2_H
+
+namespace aarch64_sve
+{
+ namespace functions
+ {
+ extern const function_base *const svaba;
+ extern const function_base *const svabalb;
+ extern const function_base *const svabalt;
+ extern const function_base *const svabdlb;
+ extern const function_base *const svabdlt;
+ extern const function_base *const svadalp;
+ extern const function_base *const svadclb;
+ extern const function_base *const svadclt;
+ extern const function_base *const svaddhnb;
+ extern const function_base *const svaddhnt;
+ extern const function_base *const svaddlb;
+ extern const function_base *const svaddlbt;
+ extern const function_base *const svaddlt;
+ extern const function_base *const svaddp;
+ extern const function_base *const svaddwb;
+ extern const function_base *const svaddwt;
+ extern const function_base *const svaesd;
+ extern const function_base *const svaese;
+ extern const function_base *const svaesimc;
+ extern const function_base *const svaesmc;
+ extern const function_base *const svbcax;
+ extern const function_base *const svbdep;
+ extern const function_base *const svbext;
+ extern const function_base *const svbgrp;
+ extern const function_base *const svbsl;
+ extern const function_base *const svbsl1n;
+ extern const function_base *const svbsl2n;
+ extern const function_base *const svcdot;
+ extern const function_base *const svcdot_lane;
+ extern const function_base *const svcvtlt;
+ extern const function_base *const svcvtnt;
+ extern const function_base *const svcvtx;
+ extern const function_base *const svcvtxnt;
+ extern const function_base *const sveor3;
+ extern const function_base *const sveorbt;
+ extern const function_base *const sveortb;
+ extern const function_base *const svhadd;
+ extern const function_base *const svhistcnt;
+ extern const function_base *const svhistseg;
+ extern const function_base *const svhsub;
+ extern const function_base *const svhsubr;
+ extern const function_base *const svldnt1_gather;
+ extern const function_base *const svldnt1sb_gather;
+ extern const function_base *const svldnt1sh_gather;
+ extern const function_base *const svldnt1sw_gather;
+ extern const function_base *const svldnt1ub_gather;
+ extern const function_base *const svldnt1uh_gather;
+ extern const function_base *const svldnt1uw_gather;
+ extern const function_base *const svlogb;
+ extern const function_base *const svmatch;
+ extern const function_base *const svmaxp;
+ extern const function_base *const svmaxnmp;
+ extern const function_base *const svmlalb;
+ extern const function_base *const svmlalb_lane;
+ extern const function_base *const svmlalt;
+ extern const function_base *const svmlalt_lane;
+ extern const function_base *const svmlslb;
+ extern const function_base *const svmlslb_lane;
+ extern const function_base *const svmlslt;
+ extern const function_base *const svmlslt_lane;
+ extern const function_base *const svminp;
+ extern const function_base *const svminnmp;
+ extern const function_base *const svmovlb;
+ extern const function_base *const svmovlt;
+ extern const function_base *const svmullb;
+ extern const function_base *const svmullb_lane;
+ extern const function_base *const svmullt;
+ extern const function_base *const svmullt_lane;
+ extern const function_base *const svnbsl;
+ extern const function_base *const svnmatch;
+ extern const function_base *const svpmul;
+ extern const function_base *const svpmullb;
+ extern const function_base *const svpmullb_pair;
+ extern const function_base *const svpmullt;
+ extern const function_base *const svpmullt_pair;
+ extern const function_base *const svqabs;
+ extern const function_base *const svqcadd;
+ extern const function_base *const svqdmlalb;
+ extern const function_base *const svqdmlalb_lane;
+ extern const function_base *const svqdmlalbt;
+ extern const function_base *const svqdmlalt;
+ extern const function_base *const svqdmlalt_lane;
+ extern const function_base *const svqdmlslb;
+ extern const function_base *const svqdmlslb_lane;
+ extern const function_base *const svqdmlslbt;
+ extern const function_base *const svqdmlslt;
+ extern const function_base *const svqdmlslt_lane;
+ extern const function_base *const svqdmulh;
+ extern const function_base *const svqdmulh_lane;
+ extern const function_base *const svqdmullb;
+ extern const function_base *const svqdmullb_lane;
+ extern const function_base *const svqdmullt;
+ extern const function_base *const svqdmullt_lane;
+ extern const function_base *const svqneg;
+ extern const function_base *const svqrdcmlah;
+ extern const function_base *const svqrdcmlah_lane;
+ extern const function_base *const svqrdmulh;
+ extern const function_base *const svqrdmulh_lane;
+ extern const function_base *const svqrdmlah;
+ extern const function_base *const svqrdmlah_lane;
+ extern const function_base *const svqrdmlsh;
+ extern const function_base *const svqrdmlsh_lane;
+ extern const function_base *const svqrshl;
+ extern const function_base *const svqrshrnb;
+ extern const function_base *const svqrshrnt;
+ extern const function_base *const svqrshrunb;
+ extern const function_base *const svqrshrunt;
+ extern const function_base *const svqshl;
+ extern const function_base *const svqshlu;
+ extern const function_base *const svqshrnb;
+ extern const function_base *const svqshrnt;
+ extern const function_base *const svqshrunb;
+ extern const function_base *const svqshrunt;
+ extern const function_base *const svqsubr;
+ extern const function_base *const svqxtnb;
+ extern const function_base *const svqxtnt;
+ extern const function_base *const svqxtunb;
+ extern const function_base *const svqxtunt;
+ extern const function_base *const svraddhnb;
+ extern const function_base *const svraddhnt;
+ extern const function_base *const svrax1;
+ extern const function_base *const svrhadd;
+ extern const function_base *const svrshl;
+ extern const function_base *const svrshr;
+ extern const function_base *const svrshrnb;
+ extern const function_base *const svrshrnt;
+ extern const function_base *const svrsra;
+ extern const function_base *const svrsubhnb;
+ extern const function_base *const svrsubhnt;
+ extern const function_base *const svsbclb;
+ extern const function_base *const svsbclt;
+ extern const function_base *const svshllb;
+ extern const function_base *const svshllt;
+ extern const function_base *const svshrnb;
+ extern const function_base *const svshrnt;
+ extern const function_base *const svsli;
+ extern const function_base *const svsm4e;
+ extern const function_base *const svsm4ekey;
+ extern const function_base *const svsqadd;
+ extern const function_base *const svsra;
+ extern const function_base *const svsri;
+ extern const function_base *const svstnt1_scatter;
+ extern const function_base *const svstnt1b_scatter;
+ extern const function_base *const svstnt1h_scatter;
+ extern const function_base *const svstnt1w_scatter;
+ extern const function_base *const svsubhnb;
+ extern const function_base *const svsubhnt;
+ extern const function_base *const svsublb;
+ extern const function_base *const svsublbt;
+ extern const function_base *const svsublt;
+ extern const function_base *const svsubltb;
+ extern const function_base *const svsubwb;
+ extern const function_base *const svsubwt;
+ extern const function_base *const svtbl2;
+ extern const function_base *const svtbx;
+ extern const function_base *const svuqadd;
+ extern const function_base *const svwhilege;
+ extern const function_base *const svwhilegt;
+ extern const function_base *const svwhilerw;
+ extern const function_base *const svwhilewr;
+ extern const function_base *const svxar;
+ }
+}
+
+#endif
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index b09067c..7aab5bd 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -50,6 +50,7 @@
#include "attribs.h"
#include "aarch64-sve-builtins.h"
#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-sve2.h"
#include "aarch64-sve-builtins-shapes.h"
namespace aarch64_sve {
@@ -190,6 +191,24 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
#define TYPES_b(S, D) \
S (b)
+/* _u8. */
+#define TYPES_b_unsigned(S, D) \
+ S (u8)
+
+/* _s8
+ _u8. */
+#define TYPES_b_integer(S, D) \
+ S (s8), TYPES_b_unsigned (S, D)
+
+/* _s8 _s16
+ _u8 _u16. */
+#define TYPES_bh_integer(S, D) \
+ S (s8), S (s16), S (u8), S (u16)
+
+/* _u8 _u32. */
+#define TYPES_bs_unsigned(S, D) \
+ S (u8), S (u32)
+
/* _s8 _s16 _s32. */
#define TYPES_bhs_signed(S, D) \
S (s8), S (s16), S (s32)
@@ -208,23 +227,64 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
#define TYPES_h_integer(S, D) \
S (s16), S (u16)
+/* _s16 _s32. */
+#define TYPES_hs_signed(S, D) \
+ S (s16), S (s32)
+
+/* _s16 _s32
+ _u16 _u32. */
+#define TYPES_hs_integer(S, D) \
+ TYPES_hs_signed (S, D), S (u16), S (u32)
+
/* _f16 _f32. */
#define TYPES_hs_float(S, D) \
S (f16), S (f32)
+/* _u16 _u64. */
+#define TYPES_hd_unsigned(S, D) \
+ S (u16), S (u64)
+
+/* _s16 _s32 _s64. */
+#define TYPES_hsd_signed(S, D) \
+ S (s16), S (s32), S (s64)
+
/* _s16 _s32 _s64
_u16 _u32 _u64. */
#define TYPES_hsd_integer(S, D) \
- S (s16), S (s32), S (s64), S (u16), S (u32), S (u64)
+ TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64)
+
+/* _f32
+ _s16 _s32 _s64
+ _u16 _u32 _u64. */
+#define TYPES_s_float_hsd_integer(S, D) \
+ S (f32), TYPES_hsd_integer (S, D)
+
+/* _f32
+ _s32 _s64
+ _u32 _u64. */
+#define TYPES_s_float_sd_integer(S, D) \
+ S (f32), TYPES_sd_integer (S, D)
+
+/* _u32. */
+#define TYPES_s_unsigned(S, D) \
+ S (u32)
/* _s32 _u32. */
#define TYPES_s_integer(S, D) \
- S (s32), S (u32)
+ S (s32), TYPES_s_unsigned (S, D)
+
+/* _s32 _s64. */
+#define TYPES_sd_signed(S, D) \
+ S (s32), S (s64)
+
+/* _u32 _u64. */
+#define TYPES_sd_unsigned(S, D) \
+ S (u32), S (u64)
/* _s32 _s64
_u32 _u64. */
#define TYPES_sd_integer(S, D) \
- S (s32), S (s64), S (u32), S (u64)
+ TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D)
/* _f32 _f64
_s32 _s64
@@ -238,10 +298,20 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
#define TYPES_all_float_and_sd_integer(S, D) \
TYPES_all_float (S, D), TYPES_sd_integer (S, D)
+/* _u64. */
+#define TYPES_d_unsigned(S, D) \
+ S (u64)
+
/* _s64
_u64. */
#define TYPES_d_integer(S, D) \
- S (s64), S (u64)
+ S (s64), TYPES_d_unsigned (S, D)
+
+/* _f64
+ _s64
+ _u64. */
+#define TYPES_d_data(S, D) \
+ S (f64), TYPES_d_integer (S, D)
/* All the type combinations allowed by svcvt. */
#define TYPES_cvt(S, D) \
@@ -265,6 +335,20 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
D (u32, f16), D (u32, f32), D (u32, f64), \
D (u64, f16), D (u64, f32), D (u64, f64)
+/* _f32_f16
+ _f64_f32. */
+#define TYPES_cvt_long(S, D) \
+ D (f32, f16), D (f64, f32)
+
+/* _f16_f32. */
+#define TYPES_cvt_narrow_s(S, D) \
+ D (f32, f64)
+
+/* _f16_f32
+ _f32_f64. */
+#define TYPES_cvt_narrow(S, D) \
+ D (f16, f32), TYPES_cvt_narrow_s (S, D)
+
/* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
{ _u32 _u64 }. */
#define TYPES_inc_dec_n1(D, A) \
@@ -334,18 +418,36 @@ DEF_SVE_TYPES_ARRAY (all_unsigned);
DEF_SVE_TYPES_ARRAY (all_integer);
DEF_SVE_TYPES_ARRAY (all_data);
DEF_SVE_TYPES_ARRAY (b);
+DEF_SVE_TYPES_ARRAY (b_unsigned);
+DEF_SVE_TYPES_ARRAY (b_integer);
+DEF_SVE_TYPES_ARRAY (bh_integer);
+DEF_SVE_TYPES_ARRAY (bs_unsigned);
DEF_SVE_TYPES_ARRAY (bhs_signed);
DEF_SVE_TYPES_ARRAY (bhs_unsigned);
DEF_SVE_TYPES_ARRAY (bhs_integer);
DEF_SVE_TYPES_ARRAY (h_integer);
+DEF_SVE_TYPES_ARRAY (hs_signed);
+DEF_SVE_TYPES_ARRAY (hs_integer);
DEF_SVE_TYPES_ARRAY (hs_float);
+DEF_SVE_TYPES_ARRAY (hd_unsigned);
+DEF_SVE_TYPES_ARRAY (hsd_signed);
DEF_SVE_TYPES_ARRAY (hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float_hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float_sd_integer);
+DEF_SVE_TYPES_ARRAY (s_unsigned);
DEF_SVE_TYPES_ARRAY (s_integer);
+DEF_SVE_TYPES_ARRAY (sd_signed);
+DEF_SVE_TYPES_ARRAY (sd_unsigned);
DEF_SVE_TYPES_ARRAY (sd_integer);
DEF_SVE_TYPES_ARRAY (sd_data);
DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer);
+DEF_SVE_TYPES_ARRAY (d_unsigned);
DEF_SVE_TYPES_ARRAY (d_integer);
+DEF_SVE_TYPES_ARRAY (d_data);
DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_long);
+DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
+DEF_SVE_TYPES_ARRAY (cvt_narrow);
DEF_SVE_TYPES_ARRAY (inc_dec_n);
DEF_SVE_TYPES_ARRAY (reinterpret);
DEF_SVE_TYPES_ARRAY (while);
@@ -357,6 +459,12 @@ static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
explicit suffix. */
static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS };
+/* Used by functions that allow merging and "don't care" predication,
+ but are not suitable for predicated MOVPRFX. */
+static const predication_index preds_mx[] = {
+ PRED_m, PRED_x, NUM_PREDS
+};
+
/* Used by functions that allow merging, zeroing and "don't care"
predication. */
static const predication_index preds_mxz[] = {
@@ -854,8 +962,11 @@ function_builder::add_unique_function (const function_instance &instance,
resolution. REQUIRED_EXTENSIONS are the set of architecture extensions
that the function requires.
- For simplicity, deal with duplicate attempts to add the same
- function. */
+ For simplicity, deal with duplicate attempts to add the same function,
+ including cases in which the new function requires more features than
+ the original one did. In that case we'll check whether the required
+ features are available as part of resolving the function to the
+ relevant unique function. */
void
function_builder::add_overloaded_function (const function_instance &instance,
uint64_t required_extensions)
@@ -863,7 +974,8 @@ function_builder::add_overloaded_function (const function_instance &instance,
char *name = get_name (instance, true);
if (registered_function **map_value = m_overload_names.get (name))
gcc_assert ((*map_value)->instance == instance
- && (*map_value)->required_extensions == required_extensions);
+ && ((*map_value)->required_extensions
+ & ~required_extensions) == 0);
else
{
registered_function &rfn
@@ -1079,7 +1191,7 @@ function_resolver::infer_pointer_type (unsigned int argno,
{
error_at (location, "passing %qT to argument %d of %qE, but %qT is not"
" a valid SVE element type", actual, argno + 1, fndecl,
- target);
+ build_qualified_type (target, 0));
return NUM_TYPE_SUFFIXES;
}
unsigned int bits = type_suffixes[type].element_bits;
@@ -1447,6 +1559,28 @@ require_derived_vector_type (unsigned int argno,
return false;
}
+/* Require argument ARGNO to match argument FIRST_ARGNO, which was inferred
+ to be a pointer to a scalar element of type TYPE. */
+bool
+function_resolver::require_matching_pointer_type (unsigned int argno,
+ unsigned int first_argno,
+ type_suffix_index type)
+{
+ type_suffix_index new_type = infer_pointer_type (argno);
+ if (new_type == NUM_TYPE_SUFFIXES)
+ return false;
+
+ if (type != new_type)
+ {
+ error_at (location, "passing %qT to argument %d of %qE, but"
+ " argument %d had type %qT", get_argument_type (argno),
+ argno + 1, fndecl, first_argno + 1,
+ get_argument_type (first_argno));
+ return false;
+ }
+ return true;
+}
+
/* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED
as the name of its expected type. Return true if the argument has the
right form, otherwise report an appropriate error. */
@@ -1641,6 +1775,31 @@ function_resolver::resolve_sv_displacement (unsigned int argno,
return mode;
}
+ unsigned int required_bits = type_suffixes[type].element_bits;
+ if (required_bits == 32
+ && displacement_units () == UNITS_elements
+ && !lookup_form (MODE_s32index, type)
+ && !lookup_form (MODE_u32index, type))
+ {
+ if (lookup_form (MODE_u32base_index, type))
+ {
+ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+ {
+ gcc_assert (!load_p);
+ error_at (location, "when storing %qT, %qE requires a vector"
+ " base and a scalar index", get_vector_type (type),
+ fndecl);
+ }
+ else
+ error_at (location, "%qE requires a vector base and a scalar"
+ " index", fndecl);
+ }
+ else
+ error_at (location, "%qE does not support 32-bit vector type %qT",
+ fndecl, get_vector_type (type));
+ return MODE_none;
+ }
+
/* Check for some form of vector type, without naming any in particular
as being expected. */
type_suffix_index displacement_type = infer_vector_type (argno);
@@ -1650,7 +1809,6 @@ function_resolver::resolve_sv_displacement (unsigned int argno,
/* If the displacement type is consistent with the data vector type,
try to find the associated mode suffix. This will fall through
for non-integral displacement types. */
- unsigned int required_bits = type_suffixes[type].element_bits;
if (type_suffixes[displacement_type].element_bits == required_bits)
{
vector_type_index displacement_vector_type
@@ -1659,7 +1817,21 @@ function_resolver::resolve_sv_displacement (unsigned int argno,
displacement_vector_type,
displacement_units ());
if (mode != MODE_none)
- return mode;
+ {
+ if (mode == MODE_s32offset
+ && !lookup_form (mode, type)
+ && lookup_form (MODE_u32offset, type))
+ {
+ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+ error_at (location, "%qE does not support 32-bit sign-extended"
+ " offsets", fndecl);
+ else
+ error_at (location, "%qE does not support sign-extended"
+ " offsets", fndecl);
+ return MODE_none;
+ }
+ return mode;
+ }
}
if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
@@ -1873,11 +2045,12 @@ function_resolver::check_gp_argument (unsigned int nops,
in the latter case. This "_n" form might only exist for certain
type suffixes.
- ARGNO is the index of the final argument. The inferred type
- suffix is FIRST_TYPE, which was obtained from argument FIRST_ARGNO.
+ ARGNO is the index of the final argument. The inferred type suffix
+ was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE.
EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties
of the final vector or scalar argument, in the same way as for
- require_derived_vector_type.
+ require_derived_vector_type. INFERRED_TYPE is the inferred type
+ suffix itself, or NUM_TYPE_SUFFIXES if it's the same as FIRST_TYPE.
Return the function decl of the resolved function on success,
otherwise report a suitable error and return error_mark_node. */
@@ -1885,9 +2058,12 @@ tree function_resolver::
finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
type_suffix_index first_type,
type_class_index expected_tclass,
- unsigned int expected_bits)
+ unsigned int expected_bits,
+ type_suffix_index inferred_type)
{
- tree scalar_form = lookup_form (MODE_n, first_type);
+ if (inferred_type == NUM_TYPE_SUFFIXES)
+ inferred_type = first_type;
+ tree scalar_form = lookup_form (MODE_n, inferred_type);
/* Allow the final argument to be scalar, if an _n form exists. */
if (scalar_argument_p (argno))
@@ -1897,7 +2073,7 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
/* Check the vector form normally. If that succeeds, raise an
error about having no corresponding _n form. */
- tree res = resolve_to (mode_suffix_id, first_type);
+ tree res = resolve_to (mode_suffix_id, inferred_type);
if (res != error_mark_node)
error_at (location, "passing %qT to argument %d of %qE, but its"
" %qT form does not accept scalars",
@@ -1917,13 +2093,14 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
expected_tclass, expected_bits))
return error_mark_node;
- return resolve_to (mode_suffix_id, first_type);
+ return resolve_to (mode_suffix_id, inferred_type);
}
/* Resolve a (possibly predicated) unary function. If the function uses
- merge predication, there is an extra vector argument before the
- governing predicate that specifies the values of inactive elements.
- This argument has the following properties:
+ merge predication or if TREAT_AS_MERGE_P is true, there is an extra
+ vector argument before the governing predicate that specifies the
+ values of inactive elements. This argument has the following
+ properties:
- the type class must be the same as for active elements if MERGE_TCLASS
is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself.
@@ -1935,10 +2112,11 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
otherwise report a suitable error and return error_mark_node. */
tree
function_resolver::resolve_unary (type_class_index merge_tclass,
- unsigned int merge_bits)
+ unsigned int merge_bits,
+ bool treat_as_merge_p)
{
type_suffix_index type;
- if (pred == PRED_m)
+ if (pred == PRED_m || treat_as_merge_p)
{
if (!check_num_arguments (3))
return error_mark_node;
@@ -2302,6 +2480,19 @@ gimple_folder::load_store_cookie (tree type)
return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type));
}
+/* Fold the call to a call to INSTANCE, with the same arguments. */
+gimple *
+gimple_folder::redirect_call (const function_instance &instance)
+{
+ registered_function *rfn
+ = function_table->find_with_hash (instance, instance.hash ());
+ if (!rfn)
+ return NULL;
+
+ gimple_call_set_fndecl (call, rfn->decl);
+ return call;
+}
+
/* Fold the call to a PTRUE, taking the element size from type suffix 0. */
gimple *
gimple_folder::fold_to_ptrue ()
@@ -2584,14 +2775,22 @@ function_expander::generate_insn (insn_code icode)
- a scalar base
- a vector displacement
+
+ If SCALED_P is true, it also expects:
+
- a const_int that is 1 if the displacement is zero-extended from 32 bits
- - a scaling multiplier (1 for bytes, 2 for .h indices, etc.). */
+ - a scaling multiplier (1 for bytes, 2 for .h indices, etc.).
+
+ If SCALED_P is false, the displacement is implicitly zero-extended
+ and the scaling multiplier is implicitly 1. */
void
-function_expander::prepare_gather_address_operands (unsigned int argno)
+function_expander::prepare_gather_address_operands (unsigned int argno,
+ bool scaled_p)
{
machine_mode mem_mode = memory_vector_mode ();
tree vector_type = base_vector_type ();
units_index units = displacement_units ();
+ int shift_idx = -1;
if (units == UNITS_none)
{
/* Vector base, no displacement. Convert to an integer zero base
@@ -2605,31 +2804,45 @@ function_expander::prepare_gather_address_operands (unsigned int argno)
a vector byte offset. */
std::swap (args[argno], args[argno + 1]);
if (units == UNITS_elements)
- {
- /* Convert the original scalar array index to a byte offset. */
- rtx size = gen_int_mode (GET_MODE_UNIT_SIZE (mem_mode), DImode);
- args[argno] = simplify_gen_binary (MULT, DImode, args[argno], size);
- units = UNITS_bytes;
- }
+ shift_idx = argno;
}
else
{
- /* Scalar base, vector displacement. This is what the md pattern wants,
- so we just need to make sure that the scalar base has DImode. */
+ /* Scalar base, vector displacement. This is the order that the md
+ pattern wants. */
if (Pmode == SImode)
args[argno] = simplify_gen_unary (ZERO_EXTEND, DImode,
args[argno], SImode);
vector_type = displacement_vector_type ();
+ if (units == UNITS_elements && !scaled_p)
+ shift_idx = argno + 1;
}
tree scalar_displacement_type = TREE_TYPE (vector_type);
- bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) < 64
- && TYPE_UNSIGNED (scalar_displacement_type));
+ if (shift_idx >= 0)
+ {
+ machine_mode arg_mode = GET_MODE (args[shift_idx]);
+ if (arg_mode == VOIDmode)
+ arg_mode = DImode;
+ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mem_mode);
+ rtx shift = gen_int_mode (exact_log2 (elt_bytes), DImode);
+ args[shift_idx] = simplify_gen_binary (ASHIFT, arg_mode,
+ args[shift_idx], shift);
+ units = UNITS_bytes;
+ }
+
+ bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) == 64
+ || TYPE_UNSIGNED (scalar_displacement_type));
unsigned int scale = (units == UNITS_bytes
? 1 : GET_MODE_UNIT_SIZE (mem_mode));
- args.quick_insert (argno + 2, GEN_INT (uxtw_p));
- args.quick_insert (argno + 3, GEN_INT (scale));
+ if (scaled_p)
+ {
+ args.quick_insert (argno + 2, GEN_INT (uxtw_p));
+ args.quick_insert (argno + 3, GEN_INT (scale));
+ }
+ else
+ gcc_assert (uxtw_p && scale == 1);
}
/* The final argument is an immediate svprfop value. Add two fake arguments
@@ -2969,7 +3182,11 @@ function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint,
}
if (pred == PRED_none || pred == PRED_x)
- return use_unpred_insn (code_for_aarch64_sve (unspec, mode));
+ {
+ insn_code icode = maybe_code_for_aarch64_sve (unspec, mode);
+ if (icode != CODE_FOR_nothing)
+ return use_unpred_insn (icode);
+ }
insn_code icode = code_for_cond (unspec, vector_mode (0));
return use_cond_insn (icode, merge_argno);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
index 9731a22..040f1d8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -91,6 +91,7 @@ DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode)
DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode)
#include "aarch64-sve-builtins-base.def"
+#include "aarch64-sve-builtins-sve2.def"
#undef DEF_SVE_FUNCTION
#undef DEF_SVE_TYPE_SUFFIX
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 7d07c10..f307233 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -416,6 +416,8 @@ public:
type_suffix_index);
bool require_derived_scalar_type (unsigned int, type_class_index,
unsigned int = SAME_SIZE);
+ bool require_matching_pointer_type (unsigned int, unsigned int,
+ type_suffix_index);
bool require_integer_immediate (unsigned int);
vector_type_index infer_vector_base_type (unsigned int);
@@ -430,12 +432,13 @@ public:
bool check_num_arguments (unsigned int);
bool check_gp_argument (unsigned int, unsigned int &, unsigned int &);
tree resolve_unary (type_class_index = SAME_TYPE_CLASS,
- unsigned int = SAME_SIZE);
+ unsigned int = SAME_SIZE, bool = false);
tree resolve_uniform (unsigned int, unsigned int = 0);
tree resolve_uniform_opt_n (unsigned int);
tree finish_opt_n_resolution (unsigned int, unsigned int, type_suffix_index,
type_class_index = SAME_TYPE_CLASS,
- unsigned int = SAME_SIZE);
+ unsigned int = SAME_SIZE,
+ type_suffix_index = NUM_TYPE_SUFFIXES);
tree resolve ();
@@ -493,6 +496,7 @@ public:
tree fold_contiguous_base (gimple_seq &, tree);
tree load_store_cookie (tree);
+ gimple *redirect_call (const function_instance &);
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
gimple *fold_to_vl_pred (unsigned int);
@@ -536,7 +540,7 @@ public:
void add_fixed_operand (rtx);
rtx generate_insn (insn_code);
- void prepare_gather_address_operands (unsigned int);
+ void prepare_gather_address_operands (unsigned int, bool = true);
void prepare_prefetch_operands ();
void add_ptrue_hint (unsigned int, machine_mode);
void rotate_inputs_left (unsigned int, unsigned int);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index fcb674f..22eda93 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3381,9 +3381,13 @@
;; - ORR (merging form only)
;; - SMAX
;; - SMIN
+;; - SQADD (SVE2 merging form only)
+;; - SQSUB (SVE2 merging form only)
;; - SUB (merging form only)
;; - UMAX
;; - UMIN
+;; - UQADD (SVE2 merging form only)
+;; - UQSUB (SVE2 merging form only)
;; -------------------------------------------------------------------------
;; Unpredicated integer binary operations that have an immediate form.
@@ -4445,9 +4449,12 @@
;; -------------------------------------------------------------------------
;; Includes:
;; - ASRD
+;; - SQSHLU (SVE2)
+;; - SRSHR (SVE2)
+;; - URSHR (SVE2)
;; -------------------------------------------------------------------------
-;; Unpredicated ASRD.
+;; Unpredicated <SVE_INT_OP>.
(define_expand "sdiv_pow2<mode>3"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
@@ -4464,50 +4471,50 @@
}
)
-;; Predicated ASRD with merging.
-(define_expand "@cond_asrd<mode>"
+;; Predicated right shift with merging.
+(define_expand "@cond_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
- UNSPEC_ASRD)
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+ SVE_INT_SHIFT_IMM)
(match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
)
-;; Predicated ASRD, merging with the first input.
-(define_insn "*cond_asrd<mode>_2"
+;; Predicated right shift, merging with the first input.
+(define_insn "*cond_<sve_int_op><mode>_2"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
- UNSPEC_ASRD)
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+ SVE_INT_SHIFT_IMM)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE"
"@
- asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
[(set_attr "movprfx" "*,yes")])
-;; Predicated ASRD, merging with zero.
-(define_insn "*cond_asrd<mode>_z"
+;; Predicated right shift, merging with zero.
+(define_insn "*cond_<sve_int_op><mode>_z"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
- UNSPEC_ASRD)
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+ SVE_INT_SHIFT_IMM)
(match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
- "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+ "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
[(set_attr "movprfx" "yes")])
;; -------------------------------------------------------------------------
@@ -6835,6 +6842,10 @@
;; ---- [INT] While tests
;; -------------------------------------------------------------------------
;; Includes:
+;; - WHILEGE (SVE2)
+;; - WHILEGT (SVE2)
+;; - WHILEHI (SVE2)
+;; - WHILEHS (SVE2)
;; - WHILELE
;; - WHILELO
;; - WHILELS
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 1b2b6b2..eaded5d 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -21,28 +21,191 @@
;; The file is organised into the following sections (search for the full
;; line):
;;
+;; == Moves
+;; ---- Non-temporal gather loads
+;; ---- Non-temporal scatter stores
+;;
;; == Uniform binary arithmnetic
+;; ---- [INT] Multiplication
;; ---- [INT] Scaled high-part multiplication
;; ---- [INT] General binary arithmetic that maps to unspecs
+;; ---- [INT] Saturating binary arithmetic
+;; ---- [INT] Saturating left shifts
;;
;; == Uniform ternary arithmnetic
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; ---- [INT] Multiply-and-accumulate operations
+;; ---- [INT] Binary logic operations with rotation
;; ---- [INT] Ternary logic operations
;; ---- [INT] Shift-and-accumulate operations
+;; ---- [INT] Shift-and-insert operations
+;; ---- [INT] Sum of absolute differences
;;
;; == Extending arithmetic
+;; ---- [INT] Wide binary arithmetic
;; ---- [INT] Long binary arithmetic
+;; ---- [INT] Long left shifts
+;; ---- [INT] Long binary arithmetic with accumulation
+;; ---- [FP] Long multiplication with accumulation
;;
;; == Narrowing arithnetic
+;; ---- [INT] Narrowing unary arithmetic
+;; ---- [INT] Narrowing binary arithmetic
;; ---- [INT] Narrowing right shifts
;;
+;; == Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic
+;; ---- [FP] Pairwise arithmetic
+;; ---- [INT] Pairwise arithmetic with accumulation
+;;
+;; == Complex arithmetic
+;; ---- [INT] Complex binary operations
+;; ---- [INT] Complex ternary operations
+;; ---- [INT] Complex dot product
+;;
+;; == Conversions
+;; ---- [FP<-FP] Widening conversions
+;; ---- [FP<-FP] Narrowing conversions
+;;
+;; == Other arithmetic
+;; ---- [INT] Reciprocal approximation
+;; ---- [INT<-FP] Base-2 logarithm
+;; ---- [INT] Polynomial multiplication
+;;
+;; == Permutation
+;; ---- [INT,FP] General permutes
+;; ---- [INT] Optional bit-permute extensions
+;;
;; == General
;; ---- Check for aliases between pointers
+;; ---- Histogram processing
+;; ---- String matching
+;;
+;; == Crypotographic extensions
+;; ---- Optional AES extensions
+;; ---- Optional SHA-3 extensions
+;; ---- Optional SM4 extensions
+
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LDNT1B
+;; - LDNT1D
+;; - LDNT1H
+;; - LDNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-extending loads.
+(define_insn "@aarch64_gather_ldnt<mode>"
+ [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
+ (unspec:SVE_FULL_SD
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
+ (mem:BLK (scratch))]
+ UNSPEC_LDNT1_GATHER))]
+ "TARGET_SVE2"
+ "@
+ ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
+ ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
+)
+
+;; Extending loads.
+(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
+ (ANY_EXTEND:SVE_FULL_SDI
+ (unspec:SVE_PARTIAL_I
+ [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
+ (mem:BLK (scratch))]
+ UNSPEC_LDNT1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2
+ && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "@
+ ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
+ ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Non-temporal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - STNT1B
+;; - STNT1D
+;; - STNT1H
+;; - STNT1W
+;; -------------------------------------------------------------------------
+
+;; Non-truncating stores.
+(define_insn "@aarch64_scatter_stnt<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
+
+ UNSPEC_STNT1_SCATTER))]
+ "TARGET_SVE"
+ "@
+ stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
+ stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
+)
+
+;; Truncating stores.
+(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
+ (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
+ (truncate:SVE_PARTIAL_I
+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
+ UNSPEC_STNT1_SCATTER))]
+ "TARGET_SVE2
+ && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "@
+ stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
+ stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
+)
;; =========================================================================
;; == Uniform binary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MUL
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+ "TARGET_SVE2"
+ "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Scaled high-part multiplication
;; -------------------------------------------------------------------------
;; The patterns in this section are synthetic.
@@ -64,12 +227,15 @@
rtx prod_b = gen_reg_rtx (<VWIDE>mode);
rtx prod_t = gen_reg_rtx (<VWIDE>mode);
- emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2]));
- emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2]));
+ emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
+ operands[2]));
+ emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
+ operands[2]));
rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
- emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift));
- emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift));
+ emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
+ emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
+ prod_t, shift));
DONE;
}
@@ -81,10 +247,22 @@
;; Includes:
;; - SHADD
;; - SHSUB
+;; - SHSUBR
+;; - SQRSHL
+;; - SQRSHLR
;; - SRHADD
+;; - SRSHL
+;; - SRSHLR
+;; - SUQADD
;; - UHADD
;; - UHSUB
+;; - UHSUBR
+;; - UQRSHL
+;; - UQRSHLR
;; - URHADD
+;; - URSHL
+;; - URSHLR
+;; - USQADD
;; -------------------------------------------------------------------------
;; Integer average (floor).
@@ -119,31 +297,485 @@
}
)
-;; Predicated halving addsub.
-(define_insn "*<sur>h<addsub><mode>"
+;; The immediate form of SQADD acts as an immediate form of SUQADD
+;; over its full range. In contrast to the ss_plus pattern, we do
+;; not need to treat byte immediates specially. E.g.:
+;;
+;; SQADD Z0.B, Z0.B, #128
+;;
+;; is equivalent to:
+;;
+;; MOV Z1.B, #128
+;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
+;;
+;; even though it's not equivalent to:
+;;
+;; MOV Z1.B, #128
+;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
+(define_insn "@aarch64_sve_suqadd<mode>_const"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
+ UNSPEC_SUQADD))]
+ "TARGET_SVE2"
+ "@
+ sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
+ movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; General predicated binary arithmetic. All operations handled here
+;; are commutative or have a reversed form.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated binary arithmetic with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "register_operand")]
+ SVE2_COND_INT_BINARY)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[5] = CONSTM1_RTX (<MODE>mode);
+ }
+)
+
+;; Predicated binary arithmetic, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_COND_INT_BINARY)]
+ UNSPEC_PRED_X)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary arithmetic, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
- [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
- (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
- HADDSUB)]
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated binary operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
+ SVE2_COND_INT_BINARY_REV)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& 1"
+ {
+ if (reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4]))
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ else if (!CONSTANT_P (operands[5]))
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ else
+ FAIL;
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; Predicated binary operations with no reverse form, merging with zero.
+;; At present we don't generate these patterns via a cond_* optab,
+;; so there's no correctness requirement to handle merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_COND_INT_BINARY_NOREV)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[5])"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQDMULH
+;; - SQRDMULH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+ SVE2_INT_BINARY))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_BINARY_LANE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturating left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQSHL
+;; - SQSHLR
+;; - UQSHL
+;; - UQSHLR
+;; -------------------------------------------------------------------------
+
+;; Predicated left shifts.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
- <sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sur>h<addsub>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,*,yes,yes")]
+)
+
+;; Predicated left shifts with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Predicated left shifts, merging with the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
+;; Predicated left shifts, merging with the second input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
[(set_attr "movprfx" "*,yes")]
)
+;; Predicated left shifts, merging with an independent value.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
+ SVE2_COND_INT_SHIFT)]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[4])
+ && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #
+ #"
+ "&& 1"
+ {
+ if (reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4]))
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ else if (!CONSTANT_P (operands[5]))
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ else
+ FAIL;
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; =========================================================================
;; == Uniform ternary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] General ternary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADCLB
+;; - ADCLT
+;; - EORBT
+;; - EORTB
+;; - SBCLB
+;; - SBCLT
+;; - SQRDMLAH
+;; - SQRDMLSH
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
+ SVE2_INT_TERNARY))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
+ SVE2_INT_TERNARY_LANE))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiply-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes the lane forms of:
+;; - MLA
+;; - MLS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_HSDI
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_sve_sub_mul_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (mult:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
+ "TARGET_SVE2"
+ "@
+ mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
+ movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logic operations with rotation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - XAR
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_xar<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (rotatert:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
+ "TARGET_SVE2"
+ "@
+ xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Ternary logic operations
;; -------------------------------------------------------------------------
;; Includes:
+;; - BCAX
;; - BSL
;; - BSL1N
;; - BSL2N
@@ -151,8 +783,23 @@
;; - NBSL
;; -------------------------------------------------------------------------
+;; Unpredicated exclusive OR of AND.
+(define_insn "@aarch64_sve2_bcax<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ bcax\t%0.d, %0.d, %2.d, %3.d
+ movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Unpredicated 3-way exclusive OR.
-(define_insn "*aarch64_sve2_eor3<mode>"
+(define_insn "@aarch64_sve2_eor3<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
(xor:SVE_FULL_I
(xor:SVE_FULL_I
@@ -214,6 +861,18 @@
;; Unpredicated bitwise select.
;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ "TARGET_SVE2"
+)
+
(define_insn "*aarch64_sve2_bsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
@@ -232,6 +891,25 @@
;; Unpredicated bitwise inverted select.
;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
+(define_expand "@aarch64_sve2_nbsl<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (not:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
@@ -258,6 +936,25 @@
;; Unpredicated bitwise select with inverted first operand.
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
+(define_expand "@aarch64_sve2_bsl1n<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (not:SVE_FULL_I
+ (xor:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (match_dup 2)))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
@@ -284,6 +981,26 @@
;; Unpredicated bitwise select with inverted second operand.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
+(define_expand "@aarch64_sve2_bsl2n<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (ior:SVE_FULL_I
+ (and:SVE_FULL_I
+ (match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 3 "register_operand"))
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (and:SVE_FULL_I
+ (not:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (not:SVE_FULL_I
+ (match_dup 3)))]
+ UNSPEC_PRED_X)))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(ior:SVE_FULL_I
@@ -340,27 +1057,144 @@
;; ---- [INT] Shift-and-accumulate operations
;; -------------------------------------------------------------------------
;; Includes:
+;; - SRSRA
;; - SSRA
+;; - URSRA
;; - USRA
;; -------------------------------------------------------------------------
-;; Unpredicated signed / unsigned shift-right accumulate.
+;; Provide the natural unpredicated interface for SSRA and USRA.
+(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (plus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (SHIFTRT:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 1 "register_operand")))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Pattern-match SSRA and USRA as a predicated operation whose predicate
+;; isn't needed.
(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(SHIFTRT:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand" "w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))]
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
UNSPEC_PRED_X)
- (match_operand:SVE_FULL_I 1 "register_operand" "0")))]
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
- "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+ "@
+ <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; SRSRA and URSRA.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
+ VRSHR_N)
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
+ movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shift-and-insert operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SLI
+;; - SRI
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
+ SVE2_INT_SHIFT_INSERT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABA
+;; - UABA
+;; -------------------------------------------------------------------------
+
+;; Provide the natural unpredicated interface for SABA and UABA.
+(define_expand "@aarch64_sve2_<su>aba<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (minus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (<max_opp>:SVE_FULL_I
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_X))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
+;; operation whose predicates aren't needed.
+(define_insn "*aarch64_sve2_<su>aba<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_I
+ (minus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (USMAX:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (unspec:SVE_FULL_I
+ [(match_operand 5)
+ (<max_opp>:SVE_FULL_I
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_X))
+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
@@ -368,24 +1202,302 @@
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Wide binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADDWB
+;; - SADDWT
+;; - SSUBWB
+;; - SSUBWT
+;; - UADDWB
+;; - UADDWT
+;; - USUBWB
+;; - USUBWT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_INT_BINARY_WIDE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Long binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
+;; - SABDLB
+;; - SABDLT
+;; - SADDLB
+;; - SADDLBT
+;; - SADDLT
;; - SMULLB
;; - SMULLT
+;; - SQDMULLB
+;; - SQDMULLT
+;; - SSUBLB
+;; - SSUBLBT
+;; - SSUBLT
+;; - SSUBLTB
+;; - UABDLB
+;; - UABDLT
+;; - UADDLB
+;; - UADDLT
;; - UMULLB
;; - UMULLT
+;; - USUBLB
+;; - USUBLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_INT_BINARY_LONG))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_BINARY_LONG_LANE))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long left shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SSHLLB
+;; - SSHLLT
+;; - USHLLB
+;; - USHLLT
+;; -------------------------------------------------------------------------
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_LONG))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long binary arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABALB
+;; - SABALT
+;; - SMLALB
+;; - SMLALT
+;; - SMLSLB
+;; - SMLSLT
+;; - SQDMLALB
+;; - SQDMLALBT
+;; - SQDMLALT
+;; - SQDMLSLB
+;; - SQDMLSLBT
+;; - SQDMLSLT
+;; - UABALB
+;; - UABALT
+;; - UMLALB
+;; - UMLALT
+;; - UMLSLB
+;; - UMLSLT
+;; -------------------------------------------------------------------------
+
+;; Non-saturating MLA operations.
+(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_ADD_BINARY_LONG)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_FULL_SDI
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_ADD_BINARY_LONG_LANE)
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations.
+(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (ss_plus:SVE_FULL_HSDI
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_QADD_BINARY_LONG)
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLA operations with lane select.
+(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (ss_plus:SVE_FULL_SDI
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_QADD_BINARY_LONG_LANE)
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations.
+(define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_SUB_BINARY_LONG)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Non-saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (minus:SVE_FULL_SDI
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_SUB_BINARY_LONG_LANE)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations.
+(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (ss_minus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_QSUB_BINARY_LONG)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
+ movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Saturating MLS operations with lane select.
+(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (ss_minus:SVE_FULL_SDI
+ (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_QSUB_BINARY_LONG_LANE)))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
+ movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
;; -------------------------------------------------------------------------
+;; ---- [FP] Long multiplication with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLALB
+;; - FMLALT
+;; - FMLSLB
+;; - FMLSLT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+ (match_operand:<VNARROW> 2 "register_operand" "w, w")
+ (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
+ SVE2_FP_TERNARY_LONG))]
+ "TARGET_SVE2"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+ [(set_attr "movprfx" "*,yes")]
+)
-;; Multiply long top / bottom.
-(define_insn "<su>mull<bt><Vwide>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE>
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
- (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")]
- MULLBT))]
+(define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VNARROW> 1 "register_operand" "w, w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)
+ (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
+ SVE2_FP_TERNARY_LONG_LANE))]
"TARGET_SVE2"
- "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
+ movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
+ [(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
@@ -393,6 +1505,74 @@
;; =========================================================================
;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing unary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQXTNB
+;; - SQXTNT
+;; - SQXTUNB
+;; - SQXTUNT
+;; - UQXTNB
+;; - UQXTNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
+ SVE2_INT_UNARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+ SVE2_INT_UNARY_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDHNB
+;; - ADDHNT
+;; - RADDHNB
+;; - RADDHNT
+;; - RSUBHNB
+;; - RSUBHNT
+;; - SUBHNB
+;; - SUBHNT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+ SVE2_INT_BINARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
+ SVE2_INT_BINARY_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing right shifts
;; -------------------------------------------------------------------------
;; Includes:
@@ -400,29 +1580,653 @@
;; - RSHRNT
;; - SHRNB
;; - SHRNT
+;; - SQRSHRNB
+;; - SQRSHRNT
+;; - SQRSHRUNB
+;; - SQRSHRUNT
+;; - SQSHRNB
+;; - SQSHRNT
+;; - SQSHRUNB
+;; - SQSHRUNT
+;; - UQRSHRNB
+;; - UQRSHRNT
+;; - UQSHRNB
+;; - UQSHRNT
;; -------------------------------------------------------------------------
-;; (Rounding) Right shift narrow bottom.
-(define_insn "<r>shrnb<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")]
- SHRNB))]
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:DI 2 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_NARROWB))]
"TARGET_SVE2"
- "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2"
+ "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
)
-;; (Rounding) Right shift narrow top.
-(define_insn "<r>shrnt<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0")
- (match_operand:<VWIDE> 2 "register_operand" "w")
- (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")]
- SHRNT))]
+;; The immediate range is enforced before generating the instruction.
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")]
+ SVE2_INT_SHIFT_IMM_NARROWT))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
+)
+
+;; =========================================================================
+;; == Pairwise arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDP
+;; - SMAXP
+;; - SMINP
+;; - UMAXP
+;; - UMINP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Pairwise arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDP
+;; - FMAXP
+;; - FMAXNMP
+;; - FMINP
+;; - FMINNMP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ SVE2_FP_BINARY_PAIR))]
+ "TARGET_SVE2"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Pairwise arithmetic with accumulation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SADALP
+;; - UADALP
+;; -------------------------------------------------------------------------
+
+;; Predicated pairwise absolute difference and accumulate with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_HSDI
+ [(match_dup 1)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand")
+ (match_operand:<VNARROW> 3 "register_operand")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+{
+ /* Only target code is aware of these operations, so we don't need
+ to handle the fully-general case. */
+ gcc_assert (rtx_equal_p (operands[2], operands[4])
+ || CONSTANT_P (operands[4]));
+})
+
+;; Predicated pairwise absolute difference and accumulate, merging with
+;; the first input.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand 4)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated pairwise absolute difference and accumulate, merging with zero.
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand 5)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
+ (match_operand:<VNARROW> 3 "register_operand" "w, w")]
+ SVE2_INT_BINARY_PAIR_LONG)
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
+ "&& !CONSTANT_P (operands[5])"
+ {
+ operands[5] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; =========================================================================
+;; == Complex arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CADD
+;; - SQCADD
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
+ SVE2_INT_CADD))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex ternary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CMLA
+;; - SQRDCMLA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+ SVE2_INT_CMLA))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_HSI
+ [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
+ (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
+ (unspec:SVE_FULL_HSI
+ [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_CMLA))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Complex dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CDOT
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+ SVE2_INT_CDOT))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<optab>_lane_<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")
+ (unspec:<VSI2QI>
+ [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ SVE2_INT_CDOT))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
+ movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Widening conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTLT
+;; -------------------------------------------------------------------------
+
+;; Predicated convert long top.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_LONG))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+)
+
+;; Predicated convert long top with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_FULL_SDF
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 2 "register_operand")]
+ SVE2_COND_FP_UNARY_LONG)
+ (match_operand:SVE_FULL_SDF 3 "register_operand")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+;; These instructions do not take MOVPRFX.
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_FULL_SDF
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_LONG)
+ (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Narrowing conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTNT
+;; - FCVTX
+;; - FCVTXNT
+;; -------------------------------------------------------------------------
+
+;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_cvtnt<mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+ UNSPEC_COND_FCVTNT))]
+ "TARGET_SVE2"
+ "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+)
+
+;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
+;; it supports MOVPRFX).
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:<VWIDE> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_NARROWB))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+)
+
+;; Predicated FCVTX with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand")
+ (unspec:VNx4SF_ONLY
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VWIDE> 2 "register_operand")]
+ SVE2_COND_FP_UNARY_NARROWB)
+ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx4SF_ONLY
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+ SVE2_COND_FP_UNARY_NARROWB)
+ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[3])
+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_cvtxnt<mode>"
+ [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+ (unspec:<VNARROW>
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 1 "register_operand" "0")
+ (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
+ UNSPEC_COND_FCVTXNT))]
+ "TARGET_SVE2"
+ "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+)
+
+;; =========================================================================
+;; == Other arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Reciprocal approximation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - URECPE
+;; - URSQRTE
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; Predicated integer unary operations with merging.
+(define_expand "@cond_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:VNx4SI_ONLY
+ [(match_dup 4)
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X)
+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<MODE>mode);
+ }
+)
+
+(define_insn_and_rewrite "*cond_<sve_int_op><mode>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx4SI_ONLY
+ [(match_operand 4)
+ (unspec:VNx4SI_ONLY
+ [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
+ SVE2_U32_UNARY)]
+ UNSPEC_PRED_X)
+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Base-2 logarithm
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FLOGB
+;; -------------------------------------------------------------------------
+
+;; Predicated FLOGB.
+(define_insn "@aarch64_pred_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ SVE2_COND_INT_UNARY_FP))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; Predicated FLOGB with merging.
+(define_expand "@cond_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:<V_INT_EQUIV>
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ SVE2_COND_INT_UNARY_FP)
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+)
+
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+ SVE2_COND_INT_UNARY_FP)
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2
+ && !rtx_equal_p (operands[2], operands[3])
+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Polynomial multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PMUL
+;; - PMULLB
+;; - PMULLT
+;; -------------------------------------------------------------------------
+
+;; Uniform PMUL.
+(define_insn "@aarch64_sve2_pmul<mode>"
+ [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+ (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+ UNSPEC_PMUL))]
+ "TARGET_SVE2"
+ "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Extending PMUL, with the results modeled as wider vectors.
+;; This representation is only possible for .H and .D, not .Q.
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HDI
+ [(match_operand:<VNARROW> 1 "register_operand" "w")
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_PMULL))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
+)
+
+;; Extending PMUL, with the results modeled as pairs of values.
+;; This representation works for .H, .D and .Q, with .Q requiring
+;; the AES extension. (This is enforced by the mode iterator.)
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
+ (unspec:SVE2_PMULL_PAIR_I
+ [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
+ (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
+ SVE2_PMULL_PAIR))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == Permutation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL (vector pair form)
+;; - TBX
+;; -------------------------------------------------------------------------
+
+;; TBL on a pair of data vectors.
+(define_insn "@aarch64_sve2_tbl2<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:<VDOUBLE> 1 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+ UNSPEC_TBL2))]
"TARGET_SVE2"
- "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3"
+ "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
+)
+
+;; TBX. These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve2_tbx<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "0")
+ (match_operand:SVE_FULL 2 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
+ UNSPEC_TBX))]
+ "TARGET_SVE2"
+ "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Optional bit-permute extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BDEP
+;; - BEXT
+;; - BGRP
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_int_op><mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+ SVE2_INT_BITPERM))]
+ "TARGET_SVE2_BITPERM"
+ "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
;; =========================================================================
@@ -469,3 +2273,242 @@
emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- Histogram processing
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - HISTCNT
+;; - HISTSEG
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve2_histcnt<mode>"
+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
+ UNSPEC_HISTCNT))]
+ "TARGET_SVE2"
+ "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
+(define_insn "@aarch64_sve2_histseg<mode>"
+ [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+ (unspec:VNx16QI_ONLY
+ [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
+ (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
+ UNSPEC_HISTSEG))]
+ "TARGET_SVE2"
+ "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- String matching
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MATCH
+;; - NMATCH
+;; -------------------------------------------------------------------------
+
+;; Predicated string matching.
+(define_insn "@aarch64_pred_<sve_int_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE2"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
+;; Predicated string matching in which both the flag and predicate results
+;; are interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (unspec:<VPRED>
+ [(match_dup 2)
+ (match_dup 3)]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z))]
+ "TARGET_SVE2
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+;; Predicated string matching in which only the flags result is interesting.
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+ (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
+ SVE2_MATCH)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:<VPRED> 0 "=Upa"))]
+ "TARGET_SVE2
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+;; =========================================================================
+;; == Crypotographic extensions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Optional AES extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AESD
+;; - AESE
+;; - AESIMC
+;; - AESMC
+;; -------------------------------------------------------------------------
+
+;; AESD and AESE.
+(define_insn "aarch64_sve2_aes<aes_op>"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ CRYPTO_AES))]
+ "TARGET_SVE2_AES"
+ "aes<aes_op>\t%0.b, %0.b, %2.b"
+ [(set_attr "type" "crypto_aese")]
+)
+
+;; AESMC and AESIMC. These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_aes<aesmc_op>"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(match_operand:VNx16QI 1 "register_operand" "0")]
+ CRYPTO_AESMC))]
+ "TARGET_SVE2_AES"
+ "aes<aesmc_op>\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aesmc")]
+)
+
+;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
+;; to keep the two together and enforce the register dependency without
+;; scheduling or register allocation messing up the order or introducing
+;; moves inbetween. Mash the two together during combine.
+
+(define_insn "*aarch64_sve2_aese_fused"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ UNSPEC_AESE)]
+ UNSPEC_AESMC))]
+ "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+(define_insn "*aarch64_sve2_aesd_fused"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(unspec:VNx16QI
+ [(xor:VNx16QI
+ (match_operand:VNx16QI 1 "register_operand" "%0")
+ (match_operand:VNx16QI 2 "register_operand" "w"))]
+ UNSPEC_AESD)]
+ UNSPEC_AESIMC))]
+ "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SHA-3 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - RAX1
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_sve2_rax1"
+ [(set (match_operand:VNx2DI 0 "register_operand" "=w")
+ (xor:VNx2DI
+ (rotate:VNx2DI
+ (match_operand:VNx2DI 2 "register_operand" "w")
+ (const_int 1))
+ (match_operand:VNx2DI 1 "register_operand" "w")))]
+ "TARGET_SVE2_SHA3"
+ "rax1\t%0.d, %1.d, %2.d"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Optional SM4 extensions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SM4E
+;; - SM4EKEY
+;; -------------------------------------------------------------------------
+
+;; These instructions do not take MOVPRFX.
+(define_insn "aarch64_sve2_sm4e"
+ [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+ (unspec:VNx4SI
+ [(match_operand:VNx4SI 1 "register_operand" "0")
+ (match_operand:VNx4SI 2 "register_operand" "w")]
+ UNSPEC_SM4E))]
+ "TARGET_SVE2_SM4"
+ "sm4e\t%0.s, %0.s, %2.s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+(define_insn "aarch64_sve2_sm4ekey"
+ [(set (match_operand:VNx4SI 0 "register_operand" "=w")
+ (unspec:VNx4SI
+ [(match_operand:VNx4SI 1 "register_operand" "w")
+ (match_operand:VNx4SI 2 "register_operand" "w")]
+ UNSPEC_SM4EKEY))]
+ "TARGET_SVE2_SM4"
+ "sm4ekey\t%0.s, %1.s, %2.s"
+ [(set_attr "type" "crypto_sm4")]
+)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 04dabd4..af5b00c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -248,6 +248,10 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
+#define AARCH64_ISA_SVE2_AES (aarch64_isa_flags & AARCH64_FL_SVE2_AES)
+#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
+#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
+#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
@@ -302,6 +306,18 @@ extern unsigned aarch64_architecture_version;
/* SVE2 instructions, enabled through +sve2. */
#define TARGET_SVE2 (AARCH64_ISA_SVE2)
+/* SVE2 AES instructions, enabled through +sve2-aes. */
+#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES)
+
+/* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */
+#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM)
+
+/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */
+#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3)
+
+/* SVE2 SM4 instructions, enabled through +sve2-sm4. */
+#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
+
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 509b9e5..a144e24 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -231,7 +231,9 @@
UNSPEC_LD1RQ
UNSPEC_LD1_GATHER
UNSPEC_LDFF1_GATHER
+ UNSPEC_LDNT1_GATHER
UNSPEC_ST1_SCATTER
+ UNSPEC_STNT1_SCATTER
UNSPEC_PRED_X
UNSPEC_PRED_Z
UNSPEC_PTEST
@@ -241,6 +243,10 @@
UNSPEC_UNPACKSLO
UNSPEC_UNPACKULO
UNSPEC_PACK
+ UNSPEC_WHILEGE
+ UNSPEC_WHILEGT
+ UNSPEC_WHILEHI
+ UNSPEC_WHILEHS
UNSPEC_WHILELE
UNSPEC_WHILELO
UNSPEC_WHILELS
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c6b71a6..058c6bc 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -285,8 +285,10 @@
(define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF])
;; Iterators for single modes, for "@" patterns.
+(define_mode_iterator VNx16QI_ONLY [VNx16QI])
(define_mode_iterator VNx8HI_ONLY [VNx8HI])
(define_mode_iterator VNx4SI_ONLY [VNx4SI])
+(define_mode_iterator VNx4SF_ONLY [VNx4SF])
(define_mode_iterator VNx2DI_ONLY [VNx2DI])
(define_mode_iterator VNx2DF_ONLY [VNx2DF])
@@ -298,6 +300,10 @@
VNx64QI VNx32HI VNx16SI VNx8DI
VNx32HF VNx16SF VNx8DF])
+;; SVE_STRUCT restricted to 2-vector tuples.
+(define_mode_iterator SVE_STRUCT2 [VNx32QI VNx16HI VNx8SI VNx4DI
+ VNx16HF VNx8SF VNx4DF])
+
;; All fully-packed SVE vector modes.
(define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI
VNx8HF VNx4SF VNx2DF])
@@ -308,6 +314,9 @@
;; All fully-packed SVE floating-point vector modes.
(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
+;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements.
+(define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI])
+
;; Fully-packed SVE integer vector modes that have 8-bit, 16-bit or 32-bit
;; elements.
(define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI])
@@ -319,10 +328,17 @@
;; elements.
(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI])
+;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
+
;; Fully-packed SVE floating-point vector modes that have 16-bit or 32-bit
;; elements.
(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
+;; Fully-packed SVE integer vector modes that have 16-bit or 64-bit elements.
+(define_mode_iterator SVE_FULL_HDI [VNx8HI VNx2DI])
+
;; Fully-packed SVE vector modes that have 32-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
@@ -386,6 +402,10 @@
;; SVE integer modes with 4 elements, excluding the narrowest element.
(define_mode_iterator SVE_4HSI [VNx4HI VNx4SI])
+;; SVE integer modes that can form the input to an SVE2 PMULL[BT] instruction.
+(define_mode_iterator SVE2_PMULL_PAIR_I [VNx16QI VNx4SI
+ (VNx2DI "TARGET_SVE2_AES")])
+
;; Modes involved in extending or truncating SVE data, for 8 elements per
;; 128-bit block.
(define_mode_iterator VNx8_NARROW [VNx8QI])
@@ -446,10 +466,6 @@
UNSPEC_RSUBHN2 ; Used in aarch64-simd.md.
UNSPEC_SQDMULH ; Used in aarch64-simd.md.
UNSPEC_SQRDMULH ; Used in aarch64-simd.md.
- UNSPEC_SMULLB ; Used in aarch64-sve2.md.
- UNSPEC_SMULLT ; Used in aarch64-sve2.md.
- UNSPEC_UMULLB ; Used in aarch64-sve2.md.
- UNSPEC_UMULLT ; Used in aarch64-sve2.md.
UNSPEC_PMUL ; Used in aarch64-simd.md.
UNSPEC_FMULX ; Used in aarch64-simd.md.
UNSPEC_USQADD ; Used in aarch64-simd.md.
@@ -472,10 +488,6 @@
UNSPEC_UQSHRN ; Used in aarch64-simd.md.
UNSPEC_SQRSHRN ; Used in aarch64-simd.md.
UNSPEC_UQRSHRN ; Used in aarch64-simd.md.
- UNSPEC_SHRNB ; Used in aarch64-sve2.md.
- UNSPEC_SHRNT ; Used in aarch64-sve2.md.
- UNSPEC_RSHRNB ; Used in aarch64-sve2.md.
- UNSPEC_RSHRNT ; Used in aarch64-sve2.md.
UNSPEC_SSHL ; Used in aarch64-simd.md.
UNSPEC_USHL ; Used in aarch64-simd.md.
UNSPEC_SRSHL ; Used in aarch64-simd.md.
@@ -643,11 +655,129 @@
UNSPEC_FCMLA90 ; Used in aarch64-simd.md.
UNSPEC_FCMLA180 ; Used in aarch64-simd.md.
UNSPEC_FCMLA270 ; Used in aarch64-simd.md.
- UNSPEC_SMULHS ; Used in aarch64-sve2.md.
+ UNSPEC_ASRD ; Used in aarch64-sve.md.
+ UNSPEC_ADCLB ; Used in aarch64-sve2.md.
+ UNSPEC_ADCLT ; Used in aarch64-sve2.md.
+ UNSPEC_ADDHNB ; Used in aarch64-sve2.md.
+ UNSPEC_ADDHNT ; Used in aarch64-sve2.md.
+ UNSPEC_BDEP ; Used in aarch64-sve2.md.
+ UNSPEC_BEXT ; Used in aarch64-sve2.md.
+ UNSPEC_BGRP ; Used in aarch64-sve2.md.
+ UNSPEC_CADD270 ; Used in aarch64-sve2.md.
+ UNSPEC_CADD90 ; Used in aarch64-sve2.md.
+ UNSPEC_CDOT ; Used in aarch64-sve2.md.
+ UNSPEC_CDOT180 ; Used in aarch64-sve2.md.
+ UNSPEC_CDOT270 ; Used in aarch64-sve2.md.
+ UNSPEC_CDOT90 ; Used in aarch64-sve2.md.
+ UNSPEC_CMLA ; Used in aarch64-sve2.md.
+ UNSPEC_CMLA180 ; Used in aarch64-sve2.md.
+ UNSPEC_CMLA270 ; Used in aarch64-sve2.md.
+ UNSPEC_CMLA90 ; Used in aarch64-sve2.md.
+ UNSPEC_COND_FCVTLT ; Used in aarch64-sve2.md.
+ UNSPEC_COND_FCVTNT ; Used in aarch64-sve2.md.
+ UNSPEC_COND_FCVTX ; Used in aarch64-sve2.md.
+ UNSPEC_COND_FCVTXNT ; Used in aarch64-sve2.md.
+ UNSPEC_COND_FLOGB ; Used in aarch64-sve2.md.
+ UNSPEC_EORBT ; Used in aarch64-sve2.md.
+ UNSPEC_EORTB ; Used in aarch64-sve2.md.
+ UNSPEC_FADDP ; Used in aarch64-sve2.md.
+ UNSPEC_FMAXNMP ; Used in aarch64-sve2.md.
+ UNSPEC_FMAXP ; Used in aarch64-sve2.md.
+ UNSPEC_FMINNMP ; Used in aarch64-sve2.md.
+ UNSPEC_FMINP ; Used in aarch64-sve2.md.
+ UNSPEC_FMLALB ; Used in aarch64-sve2.md.
+ UNSPEC_FMLALT ; Used in aarch64-sve2.md.
+ UNSPEC_FMLSLB ; Used in aarch64-sve2.md.
+ UNSPEC_FMLSLT ; Used in aarch64-sve2.md.
+ UNSPEC_HISTCNT ; Used in aarch64-sve2.md.
+ UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
+ UNSPEC_MATCH ; Used in aarch64-sve2.md.
+ UNSPEC_NMATCH ; Used in aarch64-sve2.md.
+ UNSPEC_PMULLB ; Used in aarch64-sve2.md.
+ UNSPEC_PMULLB_PAIR ; Used in aarch64-sve2.md.
+ UNSPEC_PMULLT ; Used in aarch64-sve2.md.
+ UNSPEC_PMULLT_PAIR ; Used in aarch64-sve2.md.
+ UNSPEC_RADDHNB ; Used in aarch64-sve2.md.
+ UNSPEC_RADDHNT ; Used in aarch64-sve2.md.
+ UNSPEC_RSHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_RSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_RSUBHNB ; Used in aarch64-sve2.md.
+ UNSPEC_RSUBHNT ; Used in aarch64-sve2.md.
+ UNSPEC_SABDLB ; Used in aarch64-sve2.md.
+ UNSPEC_SABDLT ; Used in aarch64-sve2.md.
+ UNSPEC_SADDLB ; Used in aarch64-sve2.md.
+ UNSPEC_SADDLBT ; Used in aarch64-sve2.md.
+ UNSPEC_SADDLT ; Used in aarch64-sve2.md.
+ UNSPEC_SADDWB ; Used in aarch64-sve2.md.
+ UNSPEC_SADDWT ; Used in aarch64-sve2.md.
+ UNSPEC_SBCLB ; Used in aarch64-sve2.md.
+ UNSPEC_SBCLT ; Used in aarch64-sve2.md.
+ UNSPEC_SHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_SHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_SLI ; Used in aarch64-sve2.md.
+ UNSPEC_SMAXP ; Used in aarch64-sve2.md.
+ UNSPEC_SMINP ; Used in aarch64-sve2.md.
UNSPEC_SMULHRS ; Used in aarch64-sve2.md.
- UNSPEC_UMULHS ; Used in aarch64-sve2.md.
+ UNSPEC_SMULHS ; Used in aarch64-sve2.md.
+ UNSPEC_SMULLB ; Used in aarch64-sve2.md.
+ UNSPEC_SMULLT ; Used in aarch64-sve2.md.
+ UNSPEC_SQCADD270 ; Used in aarch64-sve2.md.
+ UNSPEC_SQCADD90 ; Used in aarch64-sve2.md.
+ UNSPEC_SQDMULLB ; Used in aarch64-sve2.md.
+ UNSPEC_SQDMULLBT ; Used in aarch64-sve2.md.
+ UNSPEC_SQDMULLT ; Used in aarch64-sve2.md.
+ UNSPEC_SQRDCMLAH ; Used in aarch64-sve2.md.
+ UNSPEC_SQRDCMLAH180 ; Used in aarch64-sve2.md.
+ UNSPEC_SQRDCMLAH270 ; Used in aarch64-sve2.md.
+ UNSPEC_SQRDCMLAH90 ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRUNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQRSHRUNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQSHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQSHRUNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQSHRUNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQXTNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQXTNT ; Used in aarch64-sve2.md.
+ UNSPEC_SQXTUNB ; Used in aarch64-sve2.md.
+ UNSPEC_SQXTUNT ; Used in aarch64-sve2.md.
+ UNSPEC_SRI ; Used in aarch64-sve2.md.
+ UNSPEC_SSHLLB ; Used in aarch64-sve2.md.
+ UNSPEC_SSHLLT ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBLB ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBLBT ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBLT ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBLTB ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBWB ; Used in aarch64-sve2.md.
+ UNSPEC_SSUBWT ; Used in aarch64-sve2.md.
+ UNSPEC_SUBHNB ; Used in aarch64-sve2.md.
+ UNSPEC_SUBHNT ; Used in aarch64-sve2.md.
+ UNSPEC_TBL2 ; Used in aarch64-sve2.md.
+ UNSPEC_UABDLB ; Used in aarch64-sve2.md.
+ UNSPEC_UABDLT ; Used in aarch64-sve2.md.
+ UNSPEC_UADDLB ; Used in aarch64-sve2.md.
+ UNSPEC_UADDLT ; Used in aarch64-sve2.md.
+ UNSPEC_UADDWB ; Used in aarch64-sve2.md.
+ UNSPEC_UADDWT ; Used in aarch64-sve2.md.
+ UNSPEC_UMAXP ; Used in aarch64-sve2.md.
+ UNSPEC_UMINP ; Used in aarch64-sve2.md.
UNSPEC_UMULHRS ; Used in aarch64-sve2.md.
- UNSPEC_ASRD ; Used in aarch64-sve.md.
+ UNSPEC_UMULHS ; Used in aarch64-sve2.md.
+ UNSPEC_UMULLB ; Used in aarch64-sve2.md.
+ UNSPEC_UMULLT ; Used in aarch64-sve2.md.
+ UNSPEC_UQRSHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_UQRSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_UQSHRNB ; Used in aarch64-sve2.md.
+ UNSPEC_UQSHRNT ; Used in aarch64-sve2.md.
+ UNSPEC_UQXTNB ; Used in aarch64-sve2.md.
+ UNSPEC_UQXTNT ; Used in aarch64-sve2.md.
+ UNSPEC_USHLLB ; Used in aarch64-sve2.md.
+ UNSPEC_USHLLT ; Used in aarch64-sve2.md.
+ UNSPEC_USUBLB ; Used in aarch64-sve2.md.
+ UNSPEC_USUBLT ; Used in aarch64-sve2.md.
+ UNSPEC_USUBWB ; Used in aarch64-sve2.md.
+ UNSPEC_USUBWT ; Used in aarch64-sve2.md.
])
;; ------------------------------------------------------------------
@@ -1011,6 +1141,11 @@
(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
(V2DI "V4SI")])
+;; Narrowed modes of vector modes.
+(define_mode_attr VNARROW [(VNx8HI "VNx16QI")
+ (VNx4SI "VNx8HI") (VNx4SF "VNx8HF")
+ (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")])
+
;; Register suffix narrowed modes for VQN.
(define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
(V2DI "2s")])
@@ -1049,10 +1184,16 @@
(V8HI "4s") (V4SI "2d")
(V8HF "4s") (V4SF "2d")])
-;; SVE vector after widening
+;; SVE vector after narrowing.
+(define_mode_attr Ventype [(VNx8HI "b")
+ (VNx4SI "h") (VNx4SF "h")
+ (VNx2DI "s") (VNx2DF "s")])
+
+;; SVE vector after widening.
(define_mode_attr Vewtype [(VNx16QI "h")
(VNx8HI "s") (VNx8HF "s")
- (VNx4SI "d") (VNx4SF "d")])
+ (VNx4SI "d") (VNx4SF "d")
+ (VNx2DI "q")])
;; Widened mode register suffixes for VDW/VQW.
(define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
@@ -1401,6 +1542,11 @@
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
+(define_mode_attr VDOUBLE [(VNx16QI "VNx32QI")
+ (VNx8HI "VNx16HI") (VNx8HF "VNx16HF")
+ (VNx4SI "VNx8SI") (VNx4SF "VNx8SF")
+ (VNx2DI "VNx4DI") (VNx2DF "VNx4DF")])
+
;; On AArch64 the By element instruction doesn't have a 2S variant.
;; However because the instruction always selects a pair of values
;; The normal 3SAME instruction can be used here instead.
@@ -1427,7 +1573,7 @@
(VNx2DI "0x27")])
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
-(define_mode_attr sve_lane_con [(VNx4SI "y") (VNx2DI "x")
+(define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
;; The constraint to use for an SVE FCMLA lane index.
@@ -1529,12 +1675,18 @@
(define_code_iterator FAC_COMPARISONS [lt le ge gt])
;; SVE integer unary operations.
-(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount])
+(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount
+ (ss_abs "TARGET_SVE2")
+ (ss_neg "TARGET_SVE2")])
;; SVE integer binary operations.
(define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
ashift ashiftrt lshiftrt
- and ior xor])
+ and ior xor
+ (ss_plus "TARGET_SVE2")
+ (us_plus "TARGET_SVE2")
+ (ss_minus "TARGET_SVE2")
+ (us_minus "TARGET_SVE2")])
;; SVE integer binary division operations.
(define_code_iterator SVE_INT_BINARY_SD [div udiv])
@@ -1752,7 +1904,13 @@
(not "not")
(clrsb "cls")
(clz "clz")
- (popcount "cnt")])
+ (popcount "cnt")
+ (ss_plus "sqadd")
+ (us_plus "uqadd")
+ (ss_minus "sqsub")
+ (us_minus "uqsub")
+ (ss_neg "sqneg")
+ (ss_abs "sqabs")])
(define_code_attr sve_int_op_rev [(plus "add")
(minus "subr")
@@ -1768,7 +1926,11 @@
(lshiftrt "lsrr")
(and "and")
(ior "orr")
- (xor "eor")])
+ (xor "eor")
+ (ss_plus "sqadd")
+ (us_plus "uqadd")
+ (ss_minus "sqsubr")
+ (us_minus "uqsubr")])
;; The floating-point SVE instruction that implements an rtx code.
(define_code_attr sve_fp_op [(plus "fadd")
@@ -1814,7 +1976,11 @@
(lshiftrt "aarch64_sve_rshift_operand")
(and "aarch64_sve_pred_and_operand")
(ior "register_operand")
- (xor "register_operand")])
+ (xor "register_operand")
+ (ss_plus "register_operand")
+ (us_plus "register_operand")
+ (ss_minus "register_operand")
+ (us_minus "register_operand")])
(define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec")
(plus "inc") (ss_plus "sqinc") (us_plus "uqinc")])
@@ -1850,13 +2016,6 @@
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
-(define_int_iterator MULLBT [UNSPEC_SMULLB UNSPEC_UMULLB
- UNSPEC_SMULLT UNSPEC_UMULLT])
-
-(define_int_iterator SHRNB [UNSPEC_SHRNB UNSPEC_RSHRNB])
-
-(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT])
-
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -1971,6 +2130,11 @@
(define_int_iterator SVE_FP_UNARY_INT [UNSPEC_FEXPA])
+(define_int_iterator SVE_INT_SHIFT_IMM [UNSPEC_ASRD
+ (UNSPEC_SQSHLU "TARGET_SVE2")
+ (UNSPEC_SRSHR "TARGET_SVE2")
+ (UNSPEC_URSHR "TARGET_SVE2")])
+
(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
@@ -2084,6 +2248,10 @@
(define_int_iterator SVE_WHILE [UNSPEC_WHILELE UNSPEC_WHILELO
UNSPEC_WHILELS UNSPEC_WHILELT
+ (UNSPEC_WHILEGE "TARGET_SVE2")
+ (UNSPEC_WHILEGT "TARGET_SVE2")
+ (UNSPEC_WHILEHI "TARGET_SVE2")
+ (UNSPEC_WHILEHS "TARGET_SVE2")
(UNSPEC_WHILERW "TARGET_SVE2")
(UNSPEC_WHILEWR "TARGET_SVE2")])
@@ -2095,6 +2263,232 @@
(define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1])
+(define_int_iterator SVE2_U32_UNARY [UNSPEC_URECPE UNSPEC_RSQRTE])
+
+(define_int_iterator SVE2_INT_UNARY_NARROWB [UNSPEC_SQXTNB
+ UNSPEC_SQXTUNB
+ UNSPEC_UQXTNB])
+
+(define_int_iterator SVE2_INT_UNARY_NARROWT [UNSPEC_SQXTNT
+ UNSPEC_SQXTUNT
+ UNSPEC_UQXTNT])
+
+(define_int_iterator SVE2_INT_BINARY [UNSPEC_SQDMULH
+ UNSPEC_SQRDMULH])
+
+(define_int_iterator SVE2_INT_BINARY_LANE [UNSPEC_SQDMULH
+ UNSPEC_SQRDMULH])
+
+(define_int_iterator SVE2_INT_BINARY_LONG [UNSPEC_SABDLB
+ UNSPEC_SABDLT
+ UNSPEC_SADDLB
+ UNSPEC_SADDLBT
+ UNSPEC_SADDLT
+ UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLT
+ UNSPEC_SSUBLB
+ UNSPEC_SSUBLBT
+ UNSPEC_SSUBLT
+ UNSPEC_SSUBLTB
+ UNSPEC_UABDLB
+ UNSPEC_UABDLT
+ UNSPEC_UADDLB
+ UNSPEC_UADDLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT
+ UNSPEC_USUBLB
+ UNSPEC_USUBLT])
+
+(define_int_iterator SVE2_INT_BINARY_LONG_LANE [UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_BINARY_NARROWB [UNSPEC_ADDHNB
+ UNSPEC_RADDHNB
+ UNSPEC_RSUBHNB
+ UNSPEC_SUBHNB])
+
+(define_int_iterator SVE2_INT_BINARY_NARROWT [UNSPEC_ADDHNT
+ UNSPEC_RADDHNT
+ UNSPEC_RSUBHNT
+ UNSPEC_SUBHNT])
+
+(define_int_iterator SVE2_INT_BINARY_PAIR [UNSPEC_ADDP
+ UNSPEC_SMAXP
+ UNSPEC_SMINP
+ UNSPEC_UMAXP
+ UNSPEC_UMINP])
+
+(define_int_iterator SVE2_FP_BINARY_PAIR [UNSPEC_FADDP
+ UNSPEC_FMAXP
+ UNSPEC_FMAXNMP
+ UNSPEC_FMINP
+ UNSPEC_FMINNMP])
+
+(define_int_iterator SVE2_INT_BINARY_PAIR_LONG [UNSPEC_SADALP UNSPEC_UADALP])
+
+(define_int_iterator SVE2_INT_BINARY_WIDE [UNSPEC_SADDWB
+ UNSPEC_SADDWT
+ UNSPEC_SSUBWB
+ UNSPEC_SSUBWT
+ UNSPEC_UADDWB
+ UNSPEC_UADDWT
+ UNSPEC_USUBWB
+ UNSPEC_USUBWT])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_LONG [UNSPEC_SSHLLB
+ UNSPEC_SSHLLT
+ UNSPEC_USHLLB
+ UNSPEC_USHLLT])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWB [UNSPEC_RSHRNB
+ UNSPEC_SHRNB
+ UNSPEC_SQRSHRNB
+ UNSPEC_SQRSHRUNB
+ UNSPEC_SQSHRNB
+ UNSPEC_SQSHRUNB
+ UNSPEC_UQRSHRNB
+ UNSPEC_UQSHRNB])
+
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWT [UNSPEC_RSHRNT
+ UNSPEC_SHRNT
+ UNSPEC_SQRSHRNT
+ UNSPEC_SQRSHRUNT
+ UNSPEC_SQSHRNT
+ UNSPEC_SQSHRUNT
+ UNSPEC_UQRSHRNT
+ UNSPEC_UQSHRNT])
+
+(define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
+
+(define_int_iterator SVE2_INT_CADD [UNSPEC_CADD90
+ UNSPEC_CADD270
+ UNSPEC_SQCADD90
+ UNSPEC_SQCADD270])
+
+(define_int_iterator SVE2_INT_BITPERM [UNSPEC_BDEP UNSPEC_BEXT UNSPEC_BGRP])
+
+(define_int_iterator SVE2_INT_TERNARY [UNSPEC_ADCLB
+ UNSPEC_ADCLT
+ UNSPEC_EORBT
+ UNSPEC_EORTB
+ UNSPEC_SBCLB
+ UNSPEC_SBCLT
+ UNSPEC_SQRDMLAH
+ UNSPEC_SQRDMLSH])
+
+(define_int_iterator SVE2_INT_TERNARY_LANE [UNSPEC_SQRDMLAH
+ UNSPEC_SQRDMLSH])
+
+(define_int_iterator SVE2_FP_TERNARY_LONG [UNSPEC_FMLALB
+ UNSPEC_FMLALT
+ UNSPEC_FMLSLB
+ UNSPEC_FMLSLT])
+
+(define_int_iterator SVE2_FP_TERNARY_LONG_LANE [UNSPEC_FMLALB
+ UNSPEC_FMLALT
+ UNSPEC_FMLSLB
+ UNSPEC_FMLSLT])
+
+(define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA
+ UNSPEC_CMLA90
+ UNSPEC_CMLA180
+ UNSPEC_CMLA270
+ UNSPEC_SQRDCMLAH
+ UNSPEC_SQRDCMLAH90
+ UNSPEC_SQRDCMLAH180
+ UNSPEC_SQRDCMLAH270])
+
+(define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
+ UNSPEC_CDOT90
+ UNSPEC_CDOT180
+ UNSPEC_CDOT270])
+
+(define_int_iterator SVE2_INT_ADD_BINARY_LONG [UNSPEC_SABDLB
+ UNSPEC_SABDLT
+ UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_UABDLB
+ UNSPEC_UABDLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QADD_BINARY_LONG [UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLBT
+ UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_SUB_BINARY_LONG [UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QSUB_BINARY_LONG [UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLBT
+ UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_ADD_BINARY_LONG_LANE [UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QADD_BINARY_LONG_LANE [UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_INT_SUB_BINARY_LONG_LANE [UNSPEC_SMULLB
+ UNSPEC_SMULLT
+ UNSPEC_UMULLB
+ UNSPEC_UMULLT])
+
+(define_int_iterator SVE2_INT_QSUB_BINARY_LONG_LANE [UNSPEC_SQDMULLB
+ UNSPEC_SQDMULLT])
+
+(define_int_iterator SVE2_COND_INT_UNARY_FP [UNSPEC_COND_FLOGB])
+
+(define_int_iterator SVE2_COND_FP_UNARY_LONG [UNSPEC_COND_FCVTLT])
+
+(define_int_iterator SVE2_COND_FP_UNARY_NARROWB [UNSPEC_COND_FCVTX])
+
+(define_int_iterator SVE2_COND_INT_BINARY [UNSPEC_SHADD
+ UNSPEC_SHSUB
+ UNSPEC_SQRSHL
+ UNSPEC_SRHADD
+ UNSPEC_SRSHL
+ UNSPEC_SUQADD
+ UNSPEC_UHADD
+ UNSPEC_UHSUB
+ UNSPEC_UQRSHL
+ UNSPEC_URHADD
+ UNSPEC_URSHL
+ UNSPEC_USQADD])
+
+(define_int_iterator SVE2_COND_INT_BINARY_NOREV [UNSPEC_SUQADD
+ UNSPEC_USQADD])
+
+(define_int_iterator SVE2_COND_INT_BINARY_REV [UNSPEC_SHADD
+ UNSPEC_SHSUB
+ UNSPEC_SQRSHL
+ UNSPEC_SRHADD
+ UNSPEC_SRSHL
+ UNSPEC_UHADD
+ UNSPEC_UHSUB
+ UNSPEC_UQRSHL
+ UNSPEC_URHADD
+ UNSPEC_URSHL])
+
+(define_int_iterator SVE2_COND_INT_SHIFT [UNSPEC_SQSHL
+ UNSPEC_UQSHL])
+
+(define_int_iterator SVE2_MATCH [UNSPEC_MATCH UNSPEC_NMATCH])
+
+(define_int_iterator SVE2_PMULL [UNSPEC_PMULLB UNSPEC_PMULLT])
+
+(define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
+
(define_int_iterator FCADD [UNSPEC_FCADD90
UNSPEC_FCADD270])
@@ -2153,6 +2547,16 @@
(UNSPEC_UMINV "umin")
(UNSPEC_SMAXV "smax")
(UNSPEC_SMINV "smin")
+ (UNSPEC_CADD90 "cadd90")
+ (UNSPEC_CADD270 "cadd270")
+ (UNSPEC_CDOT "cdot")
+ (UNSPEC_CDOT90 "cdot90")
+ (UNSPEC_CDOT180 "cdot180")
+ (UNSPEC_CDOT270 "cdot270")
+ (UNSPEC_CMLA "cmla")
+ (UNSPEC_CMLA90 "cmla90")
+ (UNSPEC_CMLA180 "cmla180")
+ (UNSPEC_CMLA270 "cmla270")
(UNSPEC_FADDV "plus")
(UNSPEC_FMAXNMV "smax")
(UNSPEC_FMAXV "smax_nan")
@@ -2169,6 +2573,16 @@
(UNSPEC_FEXPA "fexpa")
(UNSPEC_FTSMUL "ftsmul")
(UNSPEC_FTSSEL "ftssel")
+ (UNSPEC_PMULLB "pmullb")
+ (UNSPEC_PMULLB_PAIR "pmullb_pair")
+ (UNSPEC_PMULLT "pmullt")
+ (UNSPEC_PMULLT_PAIR "pmullt_pair")
+ (UNSPEC_SQCADD90 "sqcadd90")
+ (UNSPEC_SQCADD270 "sqcadd270")
+ (UNSPEC_SQRDCMLAH "sqrdcmlah")
+ (UNSPEC_SQRDCMLAH90 "sqrdcmlah90")
+ (UNSPEC_SQRDCMLAH180 "sqrdcmlah180")
+ (UNSPEC_SQRDCMLAH270 "sqrdcmlah270")
(UNSPEC_WHILERW "vec_check_raw_alias")
(UNSPEC_WHILEWR "vec_check_war_alias")
(UNSPEC_COND_FABS "abs")
@@ -2269,8 +2683,6 @@
(UNSPEC_COND_FCVTZU "u")
(UNSPEC_COND_SCVTF "s")
(UNSPEC_COND_UCVTF "u")
- (UNSPEC_SMULLB "s") (UNSPEC_UMULLB "u")
- (UNSPEC_SMULLT "s") (UNSPEC_UMULLT "u")
(UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
(UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
@@ -2309,14 +2721,17 @@
(UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r")
(UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
(UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r")
- (UNSPEC_SHRNB "") (UNSPEC_SHRNT "")
- (UNSPEC_RSHRNB "r") (UNSPEC_RSHRNT "r")
(UNSPEC_SMULHS "") (UNSPEC_UMULHS "")
(UNSPEC_SMULHRS "r") (UNSPEC_UMULHRS "r")
])
(define_int_attr lr [(UNSPEC_SSLI "l") (UNSPEC_USLI "l")
- (UNSPEC_SSRI "r") (UNSPEC_USRI "r")])
+ (UNSPEC_SSRI "r") (UNSPEC_USRI "r")
+ (UNSPEC_SQSHL "l") (UNSPEC_UQSHL "l")
+ (UNSPEC_SQSHLU "l")
+ (UNSPEC_SRSHR "r") (UNSPEC_URSHR "r")
+ (UNSPEC_ASRD "r")
+ (UNSPEC_SLI "l") (UNSPEC_SRI "r")])
(define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
(UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u")
@@ -2325,9 +2740,6 @@
(UNSPEC_SHADD "") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "") (UNSPEC_URHADD "u")])
-(define_int_attr bt [(UNSPEC_SMULLB "b") (UNSPEC_UMULLB "b")
- (UNSPEC_SMULLT "t") (UNSPEC_UMULLT "t")])
-
(define_int_attr fn [(UNSPEC_LDFF1 "f") (UNSPEC_LDNF1 "n")])
(define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b")
@@ -2488,6 +2900,10 @@
(UNSPEC_COND_FCMLE "le")
(UNSPEC_COND_FCMLT "lt")
(UNSPEC_COND_FCMNE "ne")
+ (UNSPEC_WHILEGE "ge")
+ (UNSPEC_WHILEGT "gt")
+ (UNSPEC_WHILEHI "hi")
+ (UNSPEC_WHILEHS "hs")
(UNSPEC_WHILELE "le")
(UNSPEC_WHILELO "lo")
(UNSPEC_WHILELS "ls")
@@ -2495,7 +2911,11 @@
(UNSPEC_WHILERW "rw")
(UNSPEC_WHILEWR "wr")])
-(define_int_attr while_optab_cmp [(UNSPEC_WHILELE "le")
+(define_int_attr while_optab_cmp [(UNSPEC_WHILEGE "ge")
+ (UNSPEC_WHILEGT "gt")
+ (UNSPEC_WHILEHI "ugt")
+ (UNSPEC_WHILEHS "uge")
+ (UNSPEC_WHILELE "le")
(UNSPEC_WHILELO "ult")
(UNSPEC_WHILELS "ule")
(UNSPEC_WHILELT "lt")
@@ -2511,40 +2931,213 @@
(define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT "pnext")])
-(define_int_attr sve_int_op [(UNSPEC_ANDV "andv")
- (UNSPEC_IORV "orv")
- (UNSPEC_XORV "eorv")
- (UNSPEC_UMAXV "umaxv")
- (UNSPEC_UMINV "uminv")
- (UNSPEC_SMAXV "smaxv")
- (UNSPEC_SMINV "sminv")
- (UNSPEC_SMUL_HIGHPART "smulh")
- (UNSPEC_UMUL_HIGHPART "umulh")
- (UNSPEC_ASHIFT_WIDE "lsl")
+(define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb")
+ (UNSPEC_ADCLT "adclt")
+ (UNSPEC_ADDHNB "addhnb")
+ (UNSPEC_ADDHNT "addhnt")
+ (UNSPEC_ADDP "addp")
+ (UNSPEC_ANDV "andv")
(UNSPEC_ASHIFTRT_WIDE "asr")
+ (UNSPEC_ASHIFT_WIDE "lsl")
+ (UNSPEC_ASRD "asrd")
+ (UNSPEC_BDEP "bdep")
+ (UNSPEC_BEXT "bext")
+ (UNSPEC_BGRP "bgrp")
+ (UNSPEC_CADD90 "cadd")
+ (UNSPEC_CADD270 "cadd")
+ (UNSPEC_CDOT "cdot")
+ (UNSPEC_CDOT90 "cdot")
+ (UNSPEC_CDOT180 "cdot")
+ (UNSPEC_CDOT270 "cdot")
+ (UNSPEC_CMLA "cmla")
+ (UNSPEC_CMLA90 "cmla")
+ (UNSPEC_CMLA180 "cmla")
+ (UNSPEC_CMLA270 "cmla")
+ (UNSPEC_EORBT "eorbt")
+ (UNSPEC_EORTB "eortb")
+ (UNSPEC_IORV "orv")
(UNSPEC_LSHIFTRT_WIDE "lsr")
+ (UNSPEC_MATCH "match")
+ (UNSPEC_NMATCH "nmatch")
+ (UNSPEC_PMULLB "pmullb")
+ (UNSPEC_PMULLB_PAIR "pmullb")
+ (UNSPEC_PMULLT "pmullt")
+ (UNSPEC_PMULLT_PAIR "pmullt")
+ (UNSPEC_RADDHNB "raddhnb")
+ (UNSPEC_RADDHNT "raddhnt")
(UNSPEC_RBIT "rbit")
(UNSPEC_REVB "revb")
(UNSPEC_REVH "revh")
- (UNSPEC_REVW "revw")])
+ (UNSPEC_REVW "revw")
+ (UNSPEC_RSHRNB "rshrnb")
+ (UNSPEC_RSHRNT "rshrnt")
+ (UNSPEC_RSQRTE "ursqrte")
+ (UNSPEC_RSUBHNB "rsubhnb")
+ (UNSPEC_RSUBHNT "rsubhnt")
+ (UNSPEC_SABDLB "sabdlb")
+ (UNSPEC_SABDLT "sabdlt")
+ (UNSPEC_SADALP "sadalp")
+ (UNSPEC_SADDLB "saddlb")
+ (UNSPEC_SADDLBT "saddlbt")
+ (UNSPEC_SADDLT "saddlt")
+ (UNSPEC_SADDWB "saddwb")
+ (UNSPEC_SADDWT "saddwt")
+ (UNSPEC_SBCLB "sbclb")
+ (UNSPEC_SBCLT "sbclt")
+ (UNSPEC_SHADD "shadd")
+ (UNSPEC_SHRNB "shrnb")
+ (UNSPEC_SHRNT "shrnt")
+ (UNSPEC_SHSUB "shsub")
+ (UNSPEC_SLI "sli")
+ (UNSPEC_SMAXP "smaxp")
+ (UNSPEC_SMAXV "smaxv")
+ (UNSPEC_SMINP "sminp")
+ (UNSPEC_SMINV "sminv")
+ (UNSPEC_SMUL_HIGHPART "smulh")
+ (UNSPEC_SMULLB "smullb")
+ (UNSPEC_SMULLT "smullt")
+ (UNSPEC_SQCADD90 "sqcadd")
+ (UNSPEC_SQCADD270 "sqcadd")
+ (UNSPEC_SQDMULH "sqdmulh")
+ (UNSPEC_SQDMULLB "sqdmullb")
+ (UNSPEC_SQDMULLBT "sqdmullbt")
+ (UNSPEC_SQDMULLT "sqdmullt")
+ (UNSPEC_SQRDCMLAH "sqrdcmlah")
+ (UNSPEC_SQRDCMLAH90 "sqrdcmlah")
+ (UNSPEC_SQRDCMLAH180 "sqrdcmlah")
+ (UNSPEC_SQRDCMLAH270 "sqrdcmlah")
+ (UNSPEC_SQRDMLAH "sqrdmlah")
+ (UNSPEC_SQRDMLSH "sqrdmlsh")
+ (UNSPEC_SQRDMULH "sqrdmulh")
+ (UNSPEC_SQRSHL "sqrshl")
+ (UNSPEC_SQRSHRNB "sqrshrnb")
+ (UNSPEC_SQRSHRNT "sqrshrnt")
+ (UNSPEC_SQRSHRUNB "sqrshrunb")
+ (UNSPEC_SQRSHRUNT "sqrshrunt")
+ (UNSPEC_SQSHL "sqshl")
+ (UNSPEC_SQSHLU "sqshlu")
+ (UNSPEC_SQSHRNB "sqshrnb")
+ (UNSPEC_SQSHRNT "sqshrnt")
+ (UNSPEC_SQSHRUNB "sqshrunb")
+ (UNSPEC_SQSHRUNT "sqshrunt")
+ (UNSPEC_SQXTNB "sqxtnb")
+ (UNSPEC_SQXTNT "sqxtnt")
+ (UNSPEC_SQXTUNB "sqxtunb")
+ (UNSPEC_SQXTUNT "sqxtunt")
+ (UNSPEC_SRHADD "srhadd")
+ (UNSPEC_SRI "sri")
+ (UNSPEC_SRSHL "srshl")
+ (UNSPEC_SRSHR "srshr")
+ (UNSPEC_SSHLLB "sshllb")
+ (UNSPEC_SSHLLT "sshllt")
+ (UNSPEC_SSUBLB "ssublb")
+ (UNSPEC_SSUBLBT "ssublbt")
+ (UNSPEC_SSUBLT "ssublt")
+ (UNSPEC_SSUBLTB "ssubltb")
+ (UNSPEC_SSUBWB "ssubwb")
+ (UNSPEC_SSUBWT "ssubwt")
+ (UNSPEC_SUBHNB "subhnb")
+ (UNSPEC_SUBHNT "subhnt")
+ (UNSPEC_SUQADD "suqadd")
+ (UNSPEC_UABDLB "uabdlb")
+ (UNSPEC_UABDLT "uabdlt")
+ (UNSPEC_UADALP "uadalp")
+ (UNSPEC_UADDLB "uaddlb")
+ (UNSPEC_UADDLT "uaddlt")
+ (UNSPEC_UADDWB "uaddwb")
+ (UNSPEC_UADDWT "uaddwt")
+ (UNSPEC_UHADD "uhadd")
+ (UNSPEC_UHSUB "uhsub")
+ (UNSPEC_UMAXP "umaxp")
+ (UNSPEC_UMAXV "umaxv")
+ (UNSPEC_UMINP "uminp")
+ (UNSPEC_UMINV "uminv")
+ (UNSPEC_UMUL_HIGHPART "umulh")
+ (UNSPEC_UMULLB "umullb")
+ (UNSPEC_UMULLT "umullt")
+ (UNSPEC_UQRSHL "uqrshl")
+ (UNSPEC_UQRSHRNB "uqrshrnb")
+ (UNSPEC_UQRSHRNT "uqrshrnt")
+ (UNSPEC_UQSHL "uqshl")
+ (UNSPEC_UQSHRNB "uqshrnb")
+ (UNSPEC_UQSHRNT "uqshrnt")
+ (UNSPEC_UQXTNB "uqxtnb")
+ (UNSPEC_UQXTNT "uqxtnt")
+ (UNSPEC_URECPE "urecpe")
+ (UNSPEC_URHADD "urhadd")
+ (UNSPEC_URSHL "urshl")
+ (UNSPEC_URSHR "urshr")
+ (UNSPEC_USHLLB "ushllb")
+ (UNSPEC_USHLLT "ushllt")
+ (UNSPEC_USQADD "usqadd")
+ (UNSPEC_USUBLB "usublb")
+ (UNSPEC_USUBLT "usublt")
+ (UNSPEC_USUBWB "usubwb")
+ (UNSPEC_USUBWT "usubwt")
+ (UNSPEC_XORV "eorv")])
+
+(define_int_attr sve_int_op_rev [(UNSPEC_SHADD "shadd")
+ (UNSPEC_SHSUB "shsubr")
+ (UNSPEC_SQRSHL "sqrshlr")
+ (UNSPEC_SRHADD "srhadd")
+ (UNSPEC_SRSHL "srshlr")
+ (UNSPEC_UHADD "uhadd")
+ (UNSPEC_UHSUB "uhsubr")
+ (UNSPEC_UQRSHL "uqrshlr")
+ (UNSPEC_URHADD "urhadd")
+ (UNSPEC_URSHL "urshlr")])
+
+(define_int_attr sve_int_add_op [(UNSPEC_SABDLB "sabalb")
+ (UNSPEC_SABDLT "sabalt")
+ (UNSPEC_SMULLB "smlalb")
+ (UNSPEC_SMULLT "smlalt")
+ (UNSPEC_UABDLB "uabalb")
+ (UNSPEC_UABDLT "uabalt")
+ (UNSPEC_UMULLB "umlalb")
+ (UNSPEC_UMULLT "umlalt")])
+
+(define_int_attr sve_int_qadd_op [(UNSPEC_SQDMULLB "sqdmlalb")
+ (UNSPEC_SQDMULLBT "sqdmlalbt")
+ (UNSPEC_SQDMULLT "sqdmlalt")])
+
+(define_int_attr sve_int_sub_op [(UNSPEC_SMULLB "smlslb")
+ (UNSPEC_SMULLT "smlslt")
+ (UNSPEC_UMULLB "umlslb")
+ (UNSPEC_UMULLT "umlslt")])
+
+(define_int_attr sve_int_qsub_op [(UNSPEC_SQDMULLB "sqdmlslb")
+ (UNSPEC_SQDMULLBT "sqdmlslbt")
+ (UNSPEC_SQDMULLT "sqdmlslt")])
(define_int_attr sve_fp_op [(UNSPEC_FRECPE "frecpe")
(UNSPEC_FRECPS "frecps")
(UNSPEC_RSQRTE "frsqrte")
(UNSPEC_RSQRTS "frsqrts")
+ (UNSPEC_FADDP "faddp")
(UNSPEC_FADDV "faddv")
+ (UNSPEC_FMAXNMP "fmaxnmp")
(UNSPEC_FMAXNMV "fmaxnmv")
+ (UNSPEC_FMAXP "fmaxp")
(UNSPEC_FMAXV "fmaxv")
+ (UNSPEC_FMINNMP "fminnmp")
(UNSPEC_FMINNMV "fminnmv")
+ (UNSPEC_FMINP "fminp")
(UNSPEC_FMINV "fminv")
(UNSPEC_FMLA "fmla")
+ (UNSPEC_FMLALB "fmlalb")
+ (UNSPEC_FMLALT "fmlalt")
(UNSPEC_FMLS "fmls")
+ (UNSPEC_FMLSLB "fmlslb")
+ (UNSPEC_FMLSLT "fmlslt")
(UNSPEC_FEXPA "fexpa")
(UNSPEC_FTSMUL "ftsmul")
(UNSPEC_FTSSEL "ftssel")
(UNSPEC_COND_FABS "fabs")
(UNSPEC_COND_FADD "fadd")
+ (UNSPEC_COND_FCVTLT "fcvtlt")
+ (UNSPEC_COND_FCVTX "fcvtx")
(UNSPEC_COND_FDIV "fdiv")
+ (UNSPEC_COND_FLOGB "flogb")
(UNSPEC_COND_FMAX "fmax")
(UNSPEC_COND_FMAXNM "fmaxnm")
(UNSPEC_COND_FMIN "fmin")
@@ -2574,12 +3167,28 @@
(UNSPEC_COND_FMULX "fmulx")
(UNSPEC_COND_FSUB "fsubr")])
-(define_int_attr rot [(UNSPEC_FCADD90 "90")
+(define_int_attr rot [(UNSPEC_CADD90 "90")
+ (UNSPEC_CADD270 "270")
+ (UNSPEC_CDOT "0")
+ (UNSPEC_CDOT90 "90")
+ (UNSPEC_CDOT180 "180")
+ (UNSPEC_CDOT270 "270")
+ (UNSPEC_CMLA "0")
+ (UNSPEC_CMLA90 "90")
+ (UNSPEC_CMLA180 "180")
+ (UNSPEC_CMLA270 "270")
+ (UNSPEC_FCADD90 "90")
(UNSPEC_FCADD270 "270")
(UNSPEC_FCMLA "0")
(UNSPEC_FCMLA90 "90")
(UNSPEC_FCMLA180 "180")
(UNSPEC_FCMLA270 "270")
+ (UNSPEC_SQCADD90 "90")
+ (UNSPEC_SQCADD270 "270")
+ (UNSPEC_SQRDCMLAH "0")
+ (UNSPEC_SQRDCMLAH90 "90")
+ (UNSPEC_SQRDCMLAH180 "180")
+ (UNSPEC_SQRDCMLAH270 "270")
(UNSPEC_COND_FCADD90 "90")
(UNSPEC_COND_FCADD270 "270")
(UNSPEC_COND_FCMLA "0")
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 2bdb4a9..11d20b7 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -46,6 +46,8 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \
aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
$(srcdir)/config/aarch64/aarch64-sve-builtins.def \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
$(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \
$(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
@@ -54,7 +56,8 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
stringpool.h \
$(srcdir)/config/aarch64/aarch64-sve-builtins.h \
$(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
- $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-sve-builtins.cc
@@ -81,6 +84,20 @@ aarch64-sve-builtins-base.o: \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc
+aarch64-sve-builtins-sve2.o: \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+ rtx-vector-builder.h vec-perm-indices.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc
+
aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
$(srcdir)/config/aarch64/iterators.md
$(SHELL) $(srcdir)/config/aarch64/geniterators.sh \