aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-11-16 11:26:11 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-11-16 11:26:11 +0000
commit87a80d27218f2325d05adc5382abe7d582062306 (patch)
treecad94262d891bb42c12279735065fc6e128a6e54 /gcc/config
parentf8186eeaf3567bdaad38b5da577c8021f8588886 (diff)
downloadgcc-87a80d27218f2325d05adc5382abe7d582062306.zip
gcc-87a80d27218f2325d05adc5382abe7d582062306.tar.gz
gcc-87a80d27218f2325d05adc5382abe7d582062306.tar.bz2
[AArch64] Pattern-match SVE extending gather loads
This patch pattern-matches a partial gather load followed by a sign or zero extension into an extending gather load. (The partial gather load is already an extending load; we just don't rely on the upper bits of the elements.) 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI) (SVE_4HSI): New mode iterators. (ANY_EXTEND2): New code iterator. * config/aarch64/aarch64-sve.md (@aarch64_gather_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>): Extend to... (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>): ...this, handling extension to partial modes as well as full modes. Describe the extension as a predicated rather than unpredicated extension. (@aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): Likewise extend to... (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>): ...this, making the same adjustments. (*aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw): Likewise extend to... (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw) ...this, making the same adjustments. (*aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw): Likewise extend to... (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw) ...this, making the same adjustments. (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked): New pattern. (*aarch64_ldff1_gather<mode>_sxtw): Canonicalize to a constant extension predicate. (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>) (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw): Describe the extension as a predicated rather than unpredicated extension. (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw): Likewise. Canonicalize to a constant extension predicate. * config/aarch64/aarch64-sve-builtins-base.cc (svld1_gather_extend_impl::expand): Add an extra predicate for the extension. (svldff1_gather_extend_impl::expand): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/gather_load_extend_1.c: New test. * gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_4.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_5.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_6.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_7.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_8.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_9.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_10.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_11.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_12.c: Likewise. From-SVN: r278346
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc4
-rw-r--r--gcc/config/aarch64/aarch64-sve.md338
-rw-r--r--gcc/config/aarch64/iterators.md13
3 files changed, 227 insertions, 128 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index e12882f..52166c4 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1097,6 +1097,8 @@ public:
/* Put the predicate last, since the extending gathers use the same
operand order as mask_gather_load_optab. */
e.rotate_inputs_left (0, 5);
+ /* Add a constant predicate for the extension rtx. */
+ e.args.quick_push (CONSTM1_RTX (VNx16BImode));
insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
e.vector_mode (0),
e.memory_vector_mode ());
@@ -1234,6 +1236,8 @@ public:
/* Put the predicate last, since ldff1_gather uses the same operand
order as mask_gather_load_optab. */
e.rotate_inputs_left (0, 5);
+ /* Add a constant predicate for the extension rtx. */
+ e.args.quick_push (CONSTM1_RTX (VNx16BImode));
insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
e.vector_mode (0),
e.memory_vector_mode ());
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index e26ac45..1dcbb4b 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1446,93 +1446,150 @@
;; Predicated extending gather loads for 32-bit elements. Operand 3 is
;; true for unsigned extension and false for signed extension.
-(define_insn "@aarch64_gather_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
- [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
- (ANY_EXTEND:VNx4_WIDE
- (unspec:VNx4_NARROW
- [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
- (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
- (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER)))]
- "TARGET_SVE"
- "@
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
- ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
+ [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
+ (unspec:SVE_4HSI
+ [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
+ (ANY_EXTEND:SVE_4HSI
+ (unspec:SVE_4BHI
+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
+ (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
+ (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
+ "@
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+ ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (VNx4BImode);
+ }
)
;; Predicated extending gather loads for 64-bit elements. The value of
;; operand 3 doesn't matter in this case.
-(define_insn "@aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
- [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
- (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER)))]
- "TARGET_SVE"
+(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
+ [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
+ (unspec:SVE_2HSDI
+ [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
+ (ANY_EXTEND:SVE_2HSDI
+ (unspec:SVE_2BHSI
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
+ (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
"@
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (VNx2BImode);
+ }
)
-;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
- [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
- (unspec:VNx2DI
- [(match_operand 6)
- (sign_extend:VNx2DI
- (truncate:VNx2SI
- (match_operand:VNx2DI 2 "register_operand" "w, w")))]
- UNSPEC_PRED_X)
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER)))]
- "TARGET_SVE"
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
+ [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+ (unspec:SVE_2HSDI
+ [(match_operand 6)
+ (ANY_EXTEND:SVE_2HSDI
+ (unspec:SVE_2BHSI
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+ (unspec:VNx2DI
+ [(match_operand 7)
+ (ANY_EXTEND2:VNx2DI
+ (match_operand:VNx2SI 2 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
"@
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
- "&& !rtx_equal_p (operands[5], operands[6])"
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
+ "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
{
- operands[6] = copy_rtx (operands[5]);
+ operands[6] = CONSTM1_RTX (VNx2BImode);
+ operands[7] = CONSTM1_RTX (VNx2BImode);
}
)
-;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*aarch64_gather_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
- [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
- (and:VNx2DI
- (match_operand:VNx2DI 2 "register_operand" "w, w")
- (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER)))]
- "TARGET_SVE"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
+ [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+ (unspec:SVE_2HSDI
+ [(match_operand 6)
+ (ANY_EXTEND:SVE_2HSDI
+ (unspec:SVE_2BHSI
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+ (unspec:VNx2DI
+ [(match_operand 7)
+ (sign_extend:VNx2DI
+ (truncate:VNx2SI
+ (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+ UNSPEC_PRED_X)
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
+ "@
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
+ "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
+ {
+ operands[6] = CONSTM1_RTX (VNx2BImode);
+ operands[7] = CONSTM1_RTX (VNx2BImode);
+ }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
+ [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
+ (unspec:SVE_2HSDI
+ [(match_operand 7)
+ (ANY_EXTEND:SVE_2HSDI
+ (unspec:SVE_2BHSI
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+ (and:VNx2DI
+ (match_operand:VNx2DI 2 "register_operand" "w, w")
+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
"@
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
- ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
+ ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+ "&& !CONSTANT_P (operands[7])"
+ {
+ operands[7] = CONSTM1_RTX (VNx2BImode);
+ }
)
;; -------------------------------------------------------------------------
@@ -1608,9 +1665,9 @@
"@
ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
- "&& !rtx_equal_p (operands[5], operands[6])"
+ "&& !CONSTANT_P (operands[6])"
{
- operands[6] = copy_rtx (operands[5]);
+ operands[6] = CONSTM1_RTX (VNx2BImode);
}
)
@@ -1648,18 +1705,21 @@
;; Predicated extending first-faulting gather loads for 32-bit elements.
;; Operand 3 is true for unsigned extension and false for signed extension.
-(define_insn "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
+(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
[(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
- (ANY_EXTEND:VNx4_WIDE
- (unspec:VNx4_NARROW
- [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
- (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
- (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
- (mem:BLK (scratch))
- (reg:VNx16BI FFRT_REGNUM)]
- UNSPEC_LDFF1_GATHER)))]
+ (unspec:VNx4_WIDE
+ [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
+ (ANY_EXTEND:VNx4_WIDE
+ (unspec:VNx4_NARROW
+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
+ (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
+ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+ (mem:BLK (scratch))
+ (reg:VNx16BI FFRT_REGNUM)]
+ UNSPEC_LDFF1_GATHER))]
+ UNSPEC_PRED_X))]
"TARGET_SVE"
"@
ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
@@ -1668,77 +1728,99 @@
ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (VNx4BImode);
+ }
)
;; Predicated extending first-faulting gather loads for 64-bit elements.
;; The value of operand 3 doesn't matter in this case.
-(define_insn "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
+(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
[(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
- (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
- (mem:BLK (scratch))
- (reg:VNx16BI FFRT_REGNUM)]
- UNSPEC_LDFF1_GATHER)))]
+ (unspec:VNx2_WIDE
+ [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
+ (ANY_EXTEND:VNx2_WIDE
+ (unspec:VNx2_NARROW
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
+ (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
+ (mem:BLK (scratch))
+ (reg:VNx16BI FFRT_REGNUM)]
+ UNSPEC_LDFF1_GATHER))]
+ UNSPEC_PRED_X))]
"TARGET_SVE"
"@
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (VNx2BImode);
+ }
)
;; Likewise, but with the offset being sign-extended from 32 bits.
(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
[(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
- (unspec:VNx2DI
- [(match_operand 6)
- (sign_extend:VNx2DI
- (truncate:VNx2SI
- (match_operand:VNx2DI 2 "register_operand" "w, w")))]
- UNSPEC_PRED_X)
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
- (mem:BLK (scratch))
- (reg:VNx16BI FFRT_REGNUM)]
- UNSPEC_LDFF1_GATHER)))]
+ (unspec:VNx2_WIDE
+ [(match_operand 6)
+ (ANY_EXTEND:VNx2_WIDE
+ (unspec:VNx2_NARROW
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+ (unspec:VNx2DI
+ [(match_operand 7)
+ (sign_extend:VNx2DI
+ (truncate:VNx2SI
+ (match_operand:VNx2DI 2 "register_operand" "w, w")))]
+ UNSPEC_PRED_X)
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+ (mem:BLK (scratch))
+ (reg:VNx16BI FFRT_REGNUM)]
+ UNSPEC_LDFF1_GATHER))]
+ UNSPEC_PRED_X))]
"TARGET_SVE"
"@
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
- "&& !rtx_equal_p (operands[5], operands[6])"
+ "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
{
- operands[6] = copy_rtx (operands[5]);
+ operands[6] = CONSTM1_RTX (VNx2BImode);
+ operands[7] = CONSTM1_RTX (VNx2BImode);
}
)
;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
+(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
[(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
- (and:VNx2DI
- (match_operand:VNx2DI 2 "register_operand" "w, w")
- (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
- (mem:BLK (scratch))
- (reg:VNx16BI FFRT_REGNUM)]
- UNSPEC_LDFF1_GATHER)))]
+ (unspec:VNx2_WIDE
+ [(match_operand 7)
+ (ANY_EXTEND:VNx2_WIDE
+ (unspec:VNx2_NARROW
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
+ (and:VNx2DI
+ (match_operand:VNx2DI 2 "register_operand" "w, w")
+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
+ (mem:BLK (scratch))
+ (reg:VNx16BI FFRT_REGNUM)]
+ UNSPEC_LDFF1_GATHER))]
+ UNSPEC_PRED_X))]
"TARGET_SVE"
"@
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
+ "&& !CONSTANT_P (operands[7])"
+ {
+ operands[7] = CONSTM1_RTX (VNx2BImode);
+ }
)
;; =========================================================================
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c5b0fa7..bfeebe9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -371,9 +371,21 @@
;; SVE modes with 2 elements.
(define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2SI VNx2SF VNx2DI VNx2DF])
+;; SVE integer modes with 2 elements, excluding the widest element.
+(define_mode_iterator SVE_2BHSI [VNx2QI VNx2HI VNx2SI])
+
+;; SVE integer modes with 2 elements, excluding the narrowest element.
+(define_mode_iterator SVE_2HSDI [VNx2HI VNx2SI VNx2DI])
+
;; SVE modes with 4 elements.
(define_mode_iterator SVE_4 [VNx4QI VNx4HI VNx4HF VNx4SI VNx4SF])
+;; SVE integer modes with 4 elements, excluding the widest element.
+(define_mode_iterator SVE_4BHI [VNx4QI VNx4HI])
+
+;; SVE integer modes with 4 elements, excluding the narrowest element.
+(define_mode_iterator SVE_4HSI [VNx4HI VNx4SI])
+
;; Modes involved in extending or truncating SVE data, for 8 elements per
;; 128-bit block.
(define_mode_iterator VNx8_NARROW [VNx8QI])
@@ -1443,6 +1455,7 @@
;; Code iterator for sign/zero extension
(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+(define_code_iterator ANY_EXTEND2 [sign_extend zero_extend])
;; All division operations (signed/unsigned)
(define_code_iterator ANY_DIV [div udiv])