diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-05-16 13:01:47 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-05-19 14:45:17 +0100 |
commit | 577d5819e0cada818aca975752809d55ccecc6e8 (patch) | |
tree | b8b80ff13cf38f91bd09424932030b966db01c11 | |
parent | a680be25aa3da89c2d94dba3f76e1e1d2d81e756 (diff) | |
download | gcc-577d5819e0cada818aca975752809d55ccecc6e8.zip gcc-577d5819e0cada818aca975752809d55ccecc6e8.tar.gz gcc-577d5819e0cada818aca975752809d55ccecc6e8.tar.bz2 |
aarch64: Use an expander for quad-word vec_pack_trunc pattern
The existing vec_pack_trunc RTL pattern emits an opaque two-
instruction assembly code sequence that prevents proper instruction
scheduling. This commit changes the pattern to an expander that emits
individual xtn and xtn2 instructions.
This commit also consolidates the duplicate truncation patterns.
gcc/ChangeLog:
2021-05-17 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_pack_trunc_<mode>):
Remove as duplicate of...
(aarch64_xtn<mode>): This.
(aarch64_xtn2<mode>_le): Move position in file.
(aarch64_xtn2<mode>_be): Move position in file.
(aarch64_xtn2<mode>): Move position in file.
(vec_pack_trunc_<mode>): Define as an expander.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 113 |
1 files changed, 56 insertions, 57 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c67fa3f..447b557 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1691,14 +1691,51 @@ ;; Narrowing operations. ;; For doubles. -(define_insn "aarch64_simd_vec_pack_trunc_<mode>" - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] - "TARGET_SIMD" - "xtn\\t%0.<Vntype>, %1.<Vtype>" + +(define_insn "aarch64_xtn<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] + "TARGET_SIMD" + "xtn\\t%0.<Vntype>, %1.<Vtype>" [(set_attr "type" "neon_shift_imm_narrow_q")] ) +(define_insn "aarch64_xtn2<mode>_le" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (match_operand:<VNARROWQ> 1 "register_operand" "0") + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "xtn2\t%0.<V2ntype>, %2.<Vtype>" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "aarch64_xtn2<mode>_be" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w")) + (match_operand:<VNARROWQ> 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "xtn2\t%0.<V2ntype>, %2.<Vtype>" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_xtn2<mode>" + [(match_operand:<VNARROWQ2> 0 "register_operand") + (match_operand:<VNARROWQ> 1 "register_operand") + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))] + "TARGET_SIMD" + { + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1], + operands[2])); + DONE; + } +) + (define_expand "vec_pack_trunc_<mode>" [(match_operand:<VNARROWD> 0 "register_operand") (match_operand:VDN 1 "register_operand") @@ -1711,7 +1748,7 @@ emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); - emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); + emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg)); DONE; }) @@ -1901,20 +1938,25 @@ ;; For quads. -(define_insn "vec_pack_trunc_<mode>" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") +(define_expand "vec_pack_trunc_<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand") (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")) + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))] "TARGET_SIMD" { + rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo])); + if (BYTES_BIG_ENDIAN) - return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; + emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, operands[hi])); else - return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; + emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, operands[hi])); + DONE; } - [(set_attr "type" "multiple") - (set_attr "length" "8")] ) ;; Widening operations. @@ -8570,13 +8612,6 @@ "" ) -(define_expand "aarch64_xtn<mode>" - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] - "TARGET_SIMD" - "" -) - ;; Truncate a 128-bit integer vector to a 64-bit vector. (define_insn "trunc<mode><Vnarrowq>2" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") @@ -8586,42 +8621,6 @@ [(set_attr "type" "neon_shift_imm_narrow_q")] ) -(define_insn "aarch64_xtn2<mode>_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 1 "register_operand" "0") - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "xtn2\t%0.<V2ntype>, %2.<Vtype>" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "aarch64_xtn2<mode>_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w")) - (match_operand:<VNARROWQ> 1 "register_operand" "0")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "xtn2\t%0.<V2ntype>, %2.<Vtype>" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_expand "aarch64_xtn2<mode>" - [(match_operand:<VNARROWQ2> 0 "register_operand") - (match_operand:<VNARROWQ> 1 "register_operand") - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))] - "TARGET_SIMD" - { - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1], - operands[2])); - else - emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1], - operands[2])); - DONE; - } -) - (define_insn "aarch64_bfdot<mode>" [(set (match_operand:VDQSF 0 "register_operand" "=w") (plus:VDQSF |