;; Machine description for AArch64 SVE2.
;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; The file is organised into the following sections (search for the full
;; line):
;;
;; == Loads
;; ---- Multi-register loads predicated by a counter
;; ---- Non-temporal gather loads
;;
;; == Stores
;; ---- Multi-register stores predicated by a counter
;; ---- Non-temporal scatter stores
;;
;; == Predicate manipulation
;; ---- [PRED] Predicate-as-counter PTRUE
;; ---- [PRED] Predicate extraction
;; ---- [PRED] Predicate selection
;; ---- [PRED] Predicate count
;;
;; == Uniform unary arithmnetic
;; ---- [FP] Multi-register unary operations
;;
;; == Uniform binary arithmnetic
;; ---- [INT] Multi-register operations
;; ---- [INT] Clamp to minimum/maximum
;; ---- [INT] Multiplication
;; ---- [INT] Scaled high-part multiplication
;; ---- [INT] General binary arithmetic that maps to unspecs
;; ---- [INT] Saturating binary arithmetic
;; ---- [INT] Saturating left shifts
;; ---- [FP] Clamp to minimum/maximum
;;
;; == Uniform ternary arithmnetic
;; ---- [INT] General ternary arithmetic that maps to unspecs
;; ---- [INT] Multiply-and-accumulate operations
;; ---- [INT] Binary logic operations with rotation
;; ---- [INT] Ternary logic operations
;; ---- [INT] Shift-and-accumulate operations
;; ---- [INT] Shift-and-insert operations
;; ---- [INT] Sum of absolute differences
;;
;; == Extending arithmetic
;; ---- [INT] Multi-register widening conversions
;; ---- [INT] Wide binary arithmetic
;; ---- [INT] Long binary arithmetic
;; ---- [INT] Long left shifts
;; ---- [INT] Long binary arithmetic with accumulation
;; ---- [FP] Multi-register operations
;; ---- [FP] Long multiplication with accumulation
;;
;; == Narrowing arithnetic
;; ---- [INT] Narrowing unary arithmetic
;; ---- [INT] Multi-vector narrowing unary arithmetic
;; ---- [INT] Narrowing binary arithmetic
;; ---- [INT] Narrowing right shifts
;; ---- [INT] Multi-vector narrowing right shifts
;;
;; == Pairwise arithmetic
;; ---- [INT] Pairwise arithmetic
;; ---- [FP] Pairwise arithmetic
;; ---- [INT] Pairwise arithmetic with accumulation
;;
;; == Complex arithmetic
;; ---- [INT] Complex binary operations
;; ---- [INT] Complex ternary operations
;; ---- [INT] Complex dot product
;;
;; == Conversions
;; ---- [FP<-FP] Widening conversions
;; ---- [FP<-FP] Narrowing conversions
;; ---- [FP<-FP] Multi-vector narrowing conversions
;; ---- [FP<-INT] Multi-vector conversions
;; ---- [INT<-FP] Multi-vector conversions
;;
;; == Other arithmetic
;; ---- [INT] Reciprocal approximation
;; ---- [INT<-FP] Base-2 logarithm
;; ---- [INT] Polynomial multiplication
;;
;; == Comparisons and selects
;; ---- [INT,FP] Select based on predicates as counters
;; ---- [INT] While tests
;;
;; == Permutation
;; ---- [INT,FP] Reversal
;; ---- [INT,FP] General permutes
;; ---- [INT,FP] Multi-register permutes
;; ---- [INT] Optional bit-permute extensions
;;
;; == General
;; ---- Check for aliases between pointers
;; ---- Histogram processing
;; ---- String matching
;;
;; == Cryptographic extensions
;; ---- Optional AES extensions
;; ---- Optional SHA-3 extensions
;; ---- Optional SM4 extensions
;; =========================================================================
;; == Loads
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- Multi-register loads predicated by a counter
;; -------------------------------------------------------------------------
;; Includes:
;; - LD1B
;; - LD1D
;; - LD1H
;; - LD1W
;; - LDNT1B
;; - LDNT1D
;; - LDNT1H
;; - LDNT1W
;; -------------------------------------------------------------------------
;; Predicated LD1 (multi), with a count as predicate.
(define_insn "@aarch64_"
[(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw")
(unspec:SVE_FULLx24
[(match_operand:VNx16BI 2 "register_operand" "Uph")
(match_operand:SVE_FULLx24 1 "memory_operand" "m")]
LD1_COUNT))]
"TARGET_STREAMING_SME2"
"\t%0, %K2/z, %1"
[(set_attr "stride_type" "ld1_consecutive")]
)
(define_insn "@aarch64__strided2"
[(set (match_operand: 0 "aarch64_simd_register" "=Uwd")
(unspec:
[(match_operand:VNx16BI 3 "register_operand" "Uph")
(match_operand:SVE_FULLx2 2 "memory_operand" "m")
(const_int 0)]
LD1_COUNT))
(set (match_operand: 1 "aarch64_simd_register" "=w")
(unspec:
[(match_dup 3)
(match_dup 2)
(const_int 1)]
LD1_COUNT))]
"TARGET_STREAMING_SME2
&& aarch64_strided_registers_p (operands, 2, 8)"
"\t{%0., %1.}, %K3/z, %2"
[(set_attr "stride_type" "ld1_strided")]
)
(define_insn "@aarch64__strided4"
[(set (match_operand: 0 "aarch64_simd_register" "=Uwt")
(unspec:
[(match_operand:VNx16BI 5 "register_operand" "Uph")
(match_operand:SVE_FULLx4 4 "memory_operand" "m")
(const_int 0)]
LD1_COUNT))
(set (match_operand: 1 "aarch64_simd_register" "=w")
(unspec:
[(match_dup 5)
(match_dup 4)
(const_int 1)]
LD1_COUNT))
(set (match_operand: 2 "aarch64_simd_register" "=w")
(unspec:
[(match_dup 5)
(match_dup 4)
(const_int 2)]
LD1_COUNT))
(set (match_operand: 3 "aarch64_simd_register" "=w")
(unspec:
[(match_dup 5)
(match_dup 4)
(const_int 3)]
LD1_COUNT))]
"TARGET_STREAMING_SME2
&& aarch64_strided_registers_p (operands, 4, 4)"
"\t{%0., %1., %2., %3.}, %K5/z, %4"
[(set_attr "stride_type" "ld1_strided")]
)
;; -------------------------------------------------------------------------
;; ---- Non-temporal gather loads
;; -------------------------------------------------------------------------
;; Includes gather forms of:
;; - LDNT1B
;; - LDNT1D
;; - LDNT1H
;; - LDNT1W
;; -------------------------------------------------------------------------
;; Non-extending loads.
(define_insn "@aarch64_gather_ldnt"
[(set (match_operand:SVE_FULL_SD 0 "register_operand")
(unspec:SVE_FULL_SD
[(match_operand: 1 "register_operand")
(match_operand:DI 2 "aarch64_reg_or_zero")
(match_operand: 3 "register_operand")
(mem:BLK (scratch))]
UNSPEC_LDNT1_GATHER))]
"TARGET_SVE2 && TARGET_NON_STREAMING"
{@ [cons: =0, 1, 2, 3]
[&w, Upl, Z, w ] ldnt1\t%0., %1/z, [%3.]
[?w, Upl, Z, 0 ] ^
[&w, Upl, r, w ] ldnt1\t%0., %1/z, [%3., %2]
[?w, Upl, r, 0 ] ^
}
)
;; Extending loads.
(define_insn_and_rewrite "@aarch64_gather_ldnt_"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand")
(unspec:SVE_FULL_SDI
[(match_operand: 4 "general_operand")
(ANY_EXTEND:SVE_FULL_SDI
(unspec:SVE_PARTIAL_I
[(match_operand: 1 "register_operand")
(match_operand:DI 2 "aarch64_reg_or_zero")
(match_operand: 3 "register_operand")
(mem:BLK (scratch))]
UNSPEC_LDNT1_GATHER))]
UNSPEC_PRED_X))]
"TARGET_SVE2
&& TARGET_NON_STREAMING
&& (~ & ) == 0"
{@ [cons: =0, 1, 2, 3, 4]
[&w, Upl, Z, w, UplDnm] ldnt1\t%0., %1/z, [%3.]
[?w, Upl, Z, 0, UplDnm] ^
[&w, Upl, r, w, UplDnm] ldnt1\t%0., %1/z, [%3., %2]
[?w, Upl, r, 0, UplDnm] ^
}
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (mode);
}
)
;; =========================================================================
;; == Stores
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- Multi-register stores predicated by a counter
;; -------------------------------------------------------------------------
;; Includes:
;; - ST1B
;; - ST1D
;; - ST1H
;; - ST1W
;; - STNT1B
;; - STNT1D
;; - STNT1H
;; - STNT1W
;; -------------------------------------------------------------------------
(define_insn "@aarch64_"
[(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
(unspec:SVE_FULLx24
[(match_operand:VNx16BI 2 "register_operand" "Uph")
(match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw")
(match_dup 0)]
ST1_COUNT))]
"TARGET_STREAMING_SME2"
"\t%1, %K2, %0"
[(set_attr "stride_type" "st1_consecutive")]
)
(define_insn "@aarch64__strided2"
[(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
(unspec:SVE_FULLx24
[(match_operand:VNx16BI 1 "register_operand" "Uph")
(match_operand: 2 "aarch64_simd_register" "Uwd")
(match_operand: 3 "aarch64_simd_register" "w")
(match_dup 0)]
ST1_COUNT))]
"TARGET_STREAMING_SME2
&& aarch64_strided_registers_p (operands + 2, 2, 8)"
"\t{%2., %3.}, %K1, %0"
[(set_attr "stride_type" "st1_strided")]
)
(define_insn "@aarch64__strided4"
[(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
(unspec:SVE_FULLx24
[(match_operand:VNx16BI 1 "register_operand" "Uph")
(match_operand: 2 "aarch64_simd_register" "Uwt")
(match_operand: 3 "aarch64_simd_register" "w")
(match_operand: 4 "aarch64_simd_register" "w")
(match_operand: 5 "aarch64_simd_register" "w")
(match_dup 0)]
ST1_COUNT))]
"TARGET_STREAMING_SME2
&& aarch64_strided_registers_p (operands + 2, 4, 4)"
"\t{%2., %3., %4., %5.}, %K1, %0"
[(set_attr "stride_type" "st1_strided")]
)
;; -------------------------------------------------------------------------
;; ---- Non-temporal scatter stores
;; -------------------------------------------------------------------------
;; Includes scatter forms of:
;; - STNT1B
;; - STNT1D
;; - STNT1H
;; - STNT1W
;; -------------------------------------------------------------------------
;; Non-truncating stores.
(define_insn "@aarch64_scatter_stnt"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand: 0 "register_operand")
(match_operand:DI 1 "aarch64_reg_or_zero")
(match_operand: 2 "register_operand")
(match_operand:SVE_FULL_SD 3 "register_operand")]
UNSPEC_STNT1_SCATTER))]
"TARGET_SVE && TARGET_NON_STREAMING"
{@ [ cons: 0 , 1 , 2 , 3 ]
[ Upl , Z , w , w ] stnt1\t%3., %0, [%2.]
[ Upl , r , w , w ] stnt1\t%3., %0, [%2., %1]
}
)
;; Truncating stores.
(define_insn "@aarch64_scatter_stnt_"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand: 0 "register_operand")
(match_operand:DI 1 "aarch64_reg_or_zero")
(match_operand: 2 "register_operand")
(truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_SDI 3 "register_operand"))]
UNSPEC_STNT1_SCATTER))]
"TARGET_SVE2
&& TARGET_NON_STREAMING
&& (~ & ) == 0"
{@ [ cons: 0 , 1 , 2 , 3 ]
[ Upl , Z , w , w ] stnt1\t%3., %0, [%2.]
[ Upl , r , w , w ] stnt1\t%3., %0, [%2., %1]
}
)
;; =========================================================================
;; == Predicate manipulation
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [PRED] Predicate-as-counter PTRUE
;; -------------------------------------------------------------------------
;; - PTRUE (predicate-as-counter form)
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_ptrue_c"
[(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
(unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
"TARGET_STREAMING_SME2"
"ptrue\t%K0."
)
;; -------------------------------------------------------------------------
;; ---- [PRED] Predicate extraction
;; -------------------------------------------------------------------------
;; Includes
;; - PEXT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_pext"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI
[(match_operand:VNx16BI 1 "register_operand" "Uph")
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXT))]
"TARGET_STREAMING_SME2"
"pext\t%0., %K1[%2]"
)
(define_insn "@aarch64_sve_pextx2"
[(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
(unspec:VNx32BI
[(match_operand:VNx16BI 1 "register_operand" "Uph")
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXTx2))]
"TARGET_STREAMING_SME2"
"pext\t{%S0., %T0.}, %K1[%2]"
)
;; -------------------------------------------------------------------------
;; ---- [PRED] Predicate selection
;; -------------------------------------------------------------------------
;; Includes
;; - PSEL
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_psel"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI
[(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:VNx16BI 2 "register_operand" "Upa")
(match_operand:SI 3 "register_operand" "Ucj")
(const_int BHSD_BITS)]
UNSPEC_PSEL))]
"TARGET_STREAMING_SME2"
"psel\t%0, %1, %2.[%w3, 0]"
)
(define_insn "*aarch64_sve_psel_plus"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI
[(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:VNx16BI 2 "register_operand" "Upa")
(plus:SI
(match_operand:SI 3 "register_operand" "Ucj")
(match_operand:SI 4 "const_int_operand"))
(const_int BHSD_BITS)]
UNSPEC_PSEL))]
"TARGET_STREAMING_SME2
&& UINTVAL (operands[4]) < 128 / "
"psel\t%0, %1, %2.[%w3, %4]"
)
;; -------------------------------------------------------------------------
;; ---- [PRED] Predicate count
;; -------------------------------------------------------------------------
;; Includes
;; - CNTP (predicate as counter)
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_cntp_c"
[(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI
[(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_CNTP_C))]
"TARGET_STREAMING_SME2"
"cntp\t%x0, %K1., vlx%2"
)
;; =========================================================================
;; == Uniform unary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [FP] Multi-register unary operations
;; -------------------------------------------------------------------------
;; Includes:
;; - FRINTA
;; - FRINTM
;; - FRINTN
;; - FRINTP
;; -------------------------------------------------------------------------
(define_insn "2"
[(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw")
(unspec:SVE_SFx24
[(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw")]
SVE2_SFx24_UNARY))]
"TARGET_STREAMING_SME2"
"frint\t%0, %1"
)
;; =========================================================================
;; == Uniform binary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Multi-register operations
;; -------------------------------------------------------------------------
;; Includes the multi-register forms of:
;; - ADD
;; - SMAX
;; - SMIN
;; - SQMULH
;; - SRSHL
;; - UMAX
;; - UMIN
;; - URSHL
;; -------------------------------------------------------------------------
(define_expand "3"
[(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw")
(SVE_INT_BINARY_MULTI:SVE_Ix24
(match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw")
(match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw")))]
"TARGET_STREAMING_SME2"
)
(define_insn "*3"
[(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw")
(SVE_INT_BINARY_MULTI:SVE_Ix24
(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
(match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw")))]
"TARGET_STREAMING_SME2"
"\t%0, %0, %2"
)
(define_insn "@aarch64_sve_single_"
[(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw")
(SVE_INT_BINARY_SINGLE:SVE_Ix24
(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
(vec_duplicate:SVE_Ix24
(match_operand: 2 "register_operand" "x"))))]
"TARGET_STREAMING_SME2"
"\t%0, %0, %2."
)
(define_insn "@aarch64_sve_"
[(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw")
(unspec:SVE_Ix24
[(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
(match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw")]
SVE_INT_BINARY_MULTI))]
"TARGET_STREAMING_SME2"
"\t%0, %0, %2"
)
(define_insn "@aarch64_sve_single_"
[(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw")
(unspec:SVE_Ix24
[(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
(vec_duplicate:SVE_Ix24
(match_operand: 2 "register_operand" "x"))]
SVE_INT_BINARY_MULTI))]
"TARGET_STREAMING_SME2"
"\t%0, %0, %2."
)
;; -------------------------------------------------------------------------
;; ---- [INT] Clamp to minimum/maximum
;; -------------------------------------------------------------------------
;; - SCLAMP
;; - UCLAMP
;; -------------------------------------------------------------------------
;; The minimum is applied after the maximum, which matters if the maximum
;; bound is (unexpectedly) less than the minimum bound.
(define_insn "@aarch64_sve_clamp"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(:SVE_FULL_I
(USMAX:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 3 "register_operand")))]
"TARGET_STREAMING_SME"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] clamp\t%0., %2., %3.
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;clamp\t%0., %2., %3.
}
)
(define_insn_and_split "*aarch64_sve_clamp_x"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand 4)
(:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 5)
(USMAX:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 3 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_STREAMING_SME"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
}
"&& true"
[(set (match_dup 0)
(:SVE_FULL_I
(USMAX:SVE_FULL_I
(match_dup 1)
(match_dup 2))
(match_dup 3)))]
)
(define_insn "@aarch64_sve_clamp_single"
[(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw")
(:SVE_Ix24
(USMAX:SVE_Ix24
(match_operand:SVE_Ix24 1 "register_operand" "0")
(vec_duplicate:SVE_Ix24
(match_operand: 2 "register_operand" "w")))
(vec_duplicate:SVE_Ix24
(match_operand: 3 "register_operand" "w"))))]
"TARGET_STREAMING_SME2"
"clamp\t%0, %2., %3."
)
;; -------------------------------------------------------------------------
;; ---- [INT] Multiplication
;; -------------------------------------------------------------------------
;; Includes the lane and unpredicated forms of:
;; - MUL
;; -------------------------------------------------------------------------
(define_insn "@aarch64_mul_lane_"
[(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
(mult:SVE_FULL_HSDI_SIMD_DI
(unspec:SVE_FULL_HSDI_SIMD_DI
[(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
"TARGET_SVE2"
"mul\t%Z0., %Z1., %Z2.[%3]"
)
;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
;; we include them here to allow matching simpler, unpredicated RTL.
(define_insn "*aarch64_mul_unpredicated_"
[(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
(mult:SVE_I_SIMD_DI
(match_operand:SVE_I_SIMD_DI 1 "register_operand")
(match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
[ w , w , w ; * ] mul\t%Z0., %Z1., %Z2.
[ w , 0 , vsm ; * ] mul\t%Z0., %Z0., #%2
[ ?&w , w , vsm ; yes ] movprfx\t%Z0, %Z1\;mul\t%Z0., %Z0., #%2
}
)
;; -------------------------------------------------------------------------
;; ---- [INT] Scaled high-part multiplication
;; -------------------------------------------------------------------------
;; The patterns in this section are synthetic.
;; -------------------------------------------------------------------------
;; Unpredicated integer multiply-high-with-(round-and-)scale.
(define_expand "mulhs3"
[(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
(unspec:SVE_FULL_BHSI
[(match_dup 3)
(unspec:SVE_FULL_BHSI
[(match_operand:SVE_FULL_BHSI 1 "register_operand")
(match_operand:SVE_FULL_BHSI 2 "register_operand")]
MULHRS)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = aarch64_ptrue_reg (mode);
rtx prod_b = gen_reg_rtx (mode);
rtx prod_t = gen_reg_rtx (mode);
emit_insn (gen_aarch64_sve_mullb (prod_b, operands[1],
operands[2]));
emit_insn (gen_aarch64_sve_mullt (prod_t, operands[1],
operands[2]));
rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1);
emit_insn (gen_aarch64_sve_shrnb (operands[0], prod_b, shift));
emit_insn (gen_aarch64_sve_shrnt (operands[0], operands[0],
prod_t, shift));
DONE;
}
)
;; -------------------------------------------------------------------------
;; ---- [INT] General binary arithmetic that maps to unspecs
;; -------------------------------------------------------------------------
;; Includes:
;; - SHADD
;; - SHSUB
;; - SHSUBR
;; - SQRSHL
;; - SQRSHLR
;; - SRHADD
;; - SRSHL
;; - SRSHLR
;; - SUQADD
;; - UHADD
;; - UHSUB
;; - UHSUBR
;; - UQRSHL
;; - UQRSHLR
;; - URHADD
;; - URSHL
;; - URSHLR
;; - USQADD
;; -------------------------------------------------------------------------
;; Integer average (floor).
(define_expand "avg3_floor"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 3)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand")]
HADD)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = force_reg (mode, CONSTM1_RTX (mode));
}
)
;; Integer average (rounding).
(define_expand "avg3_ceil"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 3)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand")]
RHADD)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = force_reg (mode, CONSTM1_RTX (mode));
}
)
;; The immediate form of SQADD acts as an immediate form of SUQADD
;; over its full range. In contrast to the ss_plus pattern, we do
;; not need to treat byte immediates specially. E.g.:
;;
;; SQADD Z0.B, Z0.B, #128
;;
;; is equivalent to:
;;
;; MOV Z1.B, #128
;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
;;
;; even though it's not equivalent to:
;;
;; MOV Z1.B, #128
;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
(define_insn "@aarch64_sve_suqadd_const"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
(match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
UNSPEC_SUQADD))]
"TARGET_SVE2"
"@
sqadd\t%0., %0., #%D2
movprfx\t%0, %1\;sqadd\t%0., %0., #%D2"
[(set_attr "movprfx" "*,yes")]
)
;; General predicated binary arithmetic. All operations handled here
;; are commutative or have a reversed form.
(define_insn "@aarch64_pred_"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand: 1 "register_operand")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand")]
SVE2_COND_INT_BINARY_REV)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , Upl , 0 , w ; * ] \t%0., %1/m, %0., %3.
[ w , Upl , w , 0 ; * ] \t%0., %1/m, %0., %2.
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;\t%0., %1/m, %0., %3.
}
)
;; Predicated binary arithmetic with merging.
(define_expand "@cond_"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand: 1 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand")]
SVE2_COND_INT_BINARY)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
{
operands[5] = CONSTM1_RTX (mode);
}
)
;; Predicated binary arithmetic, merging with the first input.
(define_insn_and_rewrite "*cond__2"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand: 1 "register_operand")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand")]
SVE2_COND_INT_BINARY)]
UNSPEC_PRED_X)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , Upl , 0 , w ; * ] \t%0., %1/m, %0., %3.
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;\t%0., %1/m, %0., %3.
}
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (mode);
}
)
;; Predicated binary arithmetic, merging with the second input.
(define_insn_and_rewrite "*cond__3"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand: 1 "register_operand")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand")]
SVE2_COND_INT_BINARY_REV)]
UNSPEC_PRED_X)
(match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , Upl , w , 0 ; * ] \t%0., %1/m, %0., %2.
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;\t%0., %1/m, %0.