diff options
author | Tamar Christina <tamar.christina@arm.com> | 2022-04-07 08:27:53 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2022-04-07 08:27:53 +0100 |
commit | 024edf08959e9c1d5022901e6c4e5cbaa5b6c8d5 (patch) | |
tree | 17513f01d1c11672741ff1243864b6ceacf259f6 /gcc/config/aarch64 | |
parent | fdd81afcf18d1a926d81d63cc4525fc9442aa9a5 (diff) | |
download | gcc-024edf08959e9c1d5022901e6c4e5cbaa5b6c8d5.zip gcc-024edf08959e9c1d5022901e6c4e5cbaa5b6c8d5.tar.gz gcc-024edf08959e9c1d5022901e6c4e5cbaa5b6c8d5.tar.bz2 |
AArch64: Fix left fold sum reduction RTL patterns [PR104049]
As the discussion in the PR pointed out the RTL we have for the REDUC_PLUS
patterns are wrong. The UNSPECs are modelled as returning a vector and then
in an expand pattern we emit a vec_select of the 0th element to get the scalar.
This is incorrect as the instruction itself already only returns a single scalar
and by declaring it returns a vector it allows combine to push in a subreg into
the pattern, which causes reload to make duplicate moves.
This patch corrects this by removing the weird indirection and making the RTL
pattern model the correct semantics of the instruction immediately.
gcc/ChangeLog:
PR target/104049
* config/aarch64/aarch64-simd.md
(aarch64_reduc_plus_internal<mode>): Fix RTL and rename to...
(reduc_plus_scal_<mode>): ... This.
(reduc_plus_scal_v4sf): Moved.
(aarch64_reduc_plus_internalv2si): Fix RTL and rename to...
(reduc_plus_scal_v2si): ... This.
gcc/testsuite/ChangeLog:
PR target/104049
* gcc.target/aarch64/vadd_reduc-1.c: New test.
* gcc.target/aarch64/vadd_reduc-2.c: New test.
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 70 |
1 files changed, 28 insertions, 42 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1873342..a00e1c6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3385,20 +3385,6 @@ ;; 'across lanes' add. -(define_expand "reduc_plus_scal_<mode>" - [(match_operand:<VEL> 0 "register_operand") - (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")] - UNSPEC_ADDV)] - "TARGET_SIMD" - { - rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); - rtx scratch = gen_reg_rtx (<MODE>mode); - emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); - emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); - DONE; - } -) - (define_insn "aarch64_faddp<mode>" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") @@ -3409,31 +3395,22 @@ [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] ) -(define_insn "aarch64_reduc_plus_internal<mode>" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] +(define_insn "reduc_plus_scal_<mode>" + [(set (match_operand:<VEL> 0 "register_operand" "=w") + (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")] UNSPEC_ADDV))] "TARGET_SIMD" "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" [(set_attr "type" "neon_reduc_add<q>")] ) -(define_insn "aarch64_<su>addlv<mode>" - [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w") - (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")] - USADDLV))] - "TARGET_SIMD" - "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>" - [(set_attr "type" "neon_reduc_add<q>")] -) - -(define_insn "aarch64_<su>addlp<mode>" - [(set (match_operand:<VDBLW> 0 "register_operand" "=w") - (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")] - USADDLP))] +(define_insn "reduc_plus_scal_v2si" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")] + UNSPEC_ADDV))] "TARGET_SIMD" - "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>" - [(set_attr "type" "neon_reduc_add<q>")] + "addp\\t%0.2s, %1.2s, %1.2s" + [(set_attr "type" "neon_reduc_add")] ) ;; ADDV with result zero-extended to SI/DImode (for popcount). @@ -3447,15 +3424,6 @@ [(set_attr "type" "neon_reduc_add<VDQV_E:q>")] ) -(define_insn "aarch64_reduc_plus_internalv2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "addp\\t%0.2s, %1.2s, %1.2s" - [(set_attr "type" "neon_reduc_add")] -) - (define_insn "reduc_plus_scal_<mode>" [(set (match_operand:<VEL> 0 "register_operand" "=w") (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] @@ -3467,7 +3435,7 @@ (define_expand "reduc_plus_scal_v4sf" [(set (match_operand:SF 0 "register_operand") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] + (unspec:SF [(match_operand:V4SF 1 "register_operand")] UNSPEC_FADDV))] "TARGET_SIMD" { @@ -3479,6 +3447,24 @@ DONE; }) +(define_insn "aarch64_<su>addlv<mode>" + [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w") + (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")] + USADDLV))] + "TARGET_SIMD" + "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>" + [(set_attr "type" "neon_reduc_add<q>")] +) + +(define_insn "aarch64_<su>addlp<mode>" + [(set (match_operand:<VDBLW> 0 "register_operand" "=w") + (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")] + USADDLP))] + "TARGET_SIMD" + "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>" + [(set_attr "type" "neon_reduc_add<q>")] +) + (define_insn "clrsb<mode>2" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] |