aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2024-01-24 15:58:34 +0000
committerTamar Christina <tamar.christina@arm.com>2024-01-24 15:58:34 +0000
commitdfa17fd3b1a50cab51803e8a63c5c7b7db173523 (patch)
tree1661c6c500337e264e8b61c35905025414c89131 /gcc
parent306713c953d509720dc394c43c0890548bb0ae07 (diff)
downloadgcc-dfa17fd3b1a50cab51803e8a63c5c7b7db173523.zip
gcc-dfa17fd3b1a50cab51803e8a63c5c7b7db173523.tar.gz
gcc-dfa17fd3b1a50cab51803e8a63c5c7b7db173523.tar.bz2
AArch64: Fix expansion of Advanced SIMD div and mul using SVE [PR109636]
As suggested in the ticket this replaces the expansion by converting the Advanced SIMD types to SVE types by simply printing out an SVE register for these instructions. This fixes the subreg issues since there are no subregs involved anymore. gcc/ChangeLog: PR target/109636 * config/aarch64/aarch64-simd.md (<su_optab>div<mode>3, mulv2di3): Remove. * config/aarch64/iterators.md (VQDIV): Remove. (SVE_FULL_SDI_SIMD, SVE_FULL_HSDI_SIMD_DI, SVE_I_SIMD_DI): New. (VPRED, sve_lane_con): Add V4SI and V2DI. * config/aarch64/aarch64-sve.md (<optab><mode>3, @aarch64_pred_<optab><mode>): Support Advanced SIMD types. (mul<mode>3): New, split from <optab><mode>3. (@aarch64_pred_<optab><mode>, *post_ra_<optab><mode>3): New. * config/aarch64/aarch64-sve2.md (@aarch64_mul_lane_<mode>, *aarch64_mul_unpredicated_<mode>): Change SVE_FULL_HSDI to SVE_FULL_HSDI_SIMD_DI. gcc/testsuite/ChangeLog: PR target/109636 * gcc.target/aarch64/sve/pr109636_1.c: New test. * gcc.target/aarch64/sve/pr109636_2.c: New test. * gcc.target/aarch64/sve2/pr109636_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md41
-rw-r--r--gcc/config/aarch64/aarch64-sve.md80
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md26
-rw-r--r--gcc/config/aarch64/iterators.md19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c13
7 files changed, 118 insertions, 87 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6f48b4d..556d0cf 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -389,26 +389,6 @@
[(set_attr "type" "neon_mul_<Vetype><q>")]
)
-;; Advanced SIMD does not support vector DImode MUL, but SVE does.
-;; Make use of the overlap between Z and V registers to implement the V2DI
-;; optab for TARGET_SVE. The mulvnx2di3 expander can
-;; handle the TARGET_SVE2 case transparently.
-(define_expand "mulv2di3"
- [(set (match_operand:V2DI 0 "register_operand")
- (mult:V2DI (match_operand:V2DI 1 "register_operand")
- (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))]
- "TARGET_SVE"
- {
- machine_mode sve_mode = VNx2DImode;
- rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0);
- rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0);
- rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0);
-
- emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
- DONE;
- }
-)
-
(define_insn "bswap<mode>2"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
@@ -2678,27 +2658,6 @@
[(set_attr "type" "neon_fp_div_<stype><q>")]
)
-;; SVE has vector integer divisions, unlike Advanced SIMD.
-;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
-;; optabs to the midend.
-(define_expand "<su_optab>div<mode>3"
- [(set (match_operand:VQDIV 0 "register_operand")
- (ANY_DIV:VQDIV
- (match_operand:VQDIV 1 "register_operand")
- (match_operand:VQDIV 2 "register_operand")))]
- "TARGET_SVE"
- {
- machine_mode sve_mode
- = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
- rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
- rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
- rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
-
- emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
- DONE;
- }
-)
-
(define_insn "neg<mode>2<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index e1e3c1b..eca8623 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3789,16 +3789,35 @@
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(match_dup 3)
- (SVE_INT_BINARY_IMM:SVE_I
+ (SVE_INT_BINARY_MULTI:SVE_I
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Unpredicated integer binary operations that have an immediate form.
+;; Advanced SIMD does not support vector DImode MUL, but SVE does.
+;; Make use of the overlap between Z and V registers to implement the V2DI
+;; optab for TARGET_SVE. The mulvnx2di3 expander can
+;; handle the TARGET_SVE2 case transparently.
+(define_expand "mul<mode>3"
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (unspec:SVE_I_SIMD_DI
+ [(match_dup 3)
+ (mult:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
+ {
/* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
pattern for it here rather than splitting off the MULT expander
separately. */
- if (TARGET_SVE2 && <CODE> == MULT)
+ if (TARGET_SVE2)
{
emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
operands[1], operands[2]));
@@ -3814,26 +3833,26 @@
;; and would make the instruction seem less uniform to the register
;; allocator.
(define_insn_and_split "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (unspec:SVE_I_SIMD_DI
[(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_IMM:SVE_I
- (match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand"))]
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 2 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , Upl , %0 , <sve_imm_con> ; * ] #
- [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
[ ?&w , Upl , w , <sve_imm_con> ; yes ] #
- [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
}
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0)
- (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
""
)
@@ -3841,14 +3860,14 @@
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_<optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (SVE_INT_BINARY_IMM:SVE_I
- (match_operand:SVE_I 1 "register_operand" "0, w")
- (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
"TARGET_SVE && reload_completed"
"@
- <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
- movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
+ <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
+ movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
[(set_attr "movprfx" "*,yes")]
)
@@ -4458,13 +4477,16 @@
;; -------------------------------------------------------------------------
;; Unpredicated integer division.
+;; SVE has vector integer divisions, unlike Advanced SIMD.
+;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
+;; optabs to the midend.
(define_expand "<optab><mode>3"
- [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
- (unspec:SVE_FULL_SDI
+ [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+ (unspec:SVE_FULL_SDI_SIMD
[(match_dup 3)
- (SVE_INT_BINARY_SD:SVE_FULL_SDI
- (match_operand:SVE_FULL_SDI 1 "register_operand")
- (match_operand:SVE_FULL_SDI 2 "register_operand"))]
+ (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+ (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
+ (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
@@ -4474,18 +4496,18 @@
;; Integer division predicated with a PTRUE.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
- (unspec:SVE_FULL_SDI
+ [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+ (unspec:SVE_FULL_SDI_SIMD
[(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_SD:SVE_FULL_SDI
- (match_operand:SVE_FULL_SDI 2 "register_operand")
- (match_operand:SVE_FULL_SDI 3 "register_operand"))]
+ (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+ (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
+ (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
+ [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
}
)
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 1d1eb8b..934e570 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -615,29 +615,29 @@
;; -------------------------------------------------------------------------
(define_insn "@aarch64_mul_lane_<mode>"
- [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
- (mult:SVE_FULL_HSDI
- (unspec:SVE_FULL_HSDI
- [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
+ (mult:SVE_FULL_HSDI_SIMD_DI
+ (unspec:SVE_FULL_HSDI_SIMD_DI
+ [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
- (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+ (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
"TARGET_SVE2"
- "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+ "mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>[%3]"
)
;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
;; we include them here to allow matching simpler, unpredicated RTL.
(define_insn "*aarch64_mul_unpredicated_<mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (mult:SVE_I
- (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "aarch64_sve_vsm_operand")))]
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (mult:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , w , w ; * ] mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
- [ w , 0 , vsm ; * ] mul\t%0.<Vetype>, %0.<Vetype>, #%2
- [ ?&w , w , vsm ; yes ] movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2
+ [ w , w , w ; * ] mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>
+ [ w , 0 , vsm ; * ] mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
+ [ ?&w , w , vsm ; yes ] movprfx\t%Z0, %Z1\;mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
}
)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 942270e..99cde46 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -108,9 +108,6 @@
;; Copy of the above.
(define_mode_iterator DREG2 [DREG])
-;; Advanced SIMD modes for integer divides.
-(define_mode_iterator VQDIV [V4SI V2DI])
-
;; All modes suitable to store/load pair (2 elements) using STP/LDP.
(define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
@@ -471,6 +468,10 @@
;; elements.
(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI])
+;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit
+;; elements and Advanced SIMD Fully-packed 64-bit elements.
+(define_mode_iterator SVE_FULL_HSDI_SIMD_DI [SVE_FULL_HSDI V2DI])
+
;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit
;; elements.
(define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
@@ -488,6 +489,10 @@
;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
+;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit or
+;; 64-bit elements.
+(define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI])
+
;; 2x and 4x tuples of the above, excluding 2x DI.
(define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
@@ -550,6 +555,10 @@
VNx4SI VNx2SI
VNx2DI])
+;; All SVE integer vector modes and Advanced SIMD 64-bit vector
+;; element modes
+(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])
+
;; SVE integer vector modes whose elements are 16 bits or wider.
(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
VNx4SI VNx2SI
@@ -2268,7 +2277,8 @@
(VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
(VNx32BF "VNx8BI")
(VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
- (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
+ (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
+ (V4SI "VNx4BI") (V2DI "VNx2BI")])
;; ...and again in lower case.
(define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
@@ -2370,6 +2380,7 @@
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
(define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
+ (V2DI "x")
(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
;; The constraint to use for an SVE FCMLA lane index.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
new file mode 100644
index 0000000..5b37ddd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foo (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v));
+}
+
+/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
new file mode 100644
index 0000000..6d39dc8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mcpu=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
new file mode 100644
index 0000000..2bea18a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */