diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-01-20 18:11:20 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-01-20 19:29:42 +0000 |
commit | e140f5fd3e235c5a37dc99b79f37a5ad4dc59064 (patch) | |
tree | af6c82b56028d4c97ed7e3d44f0a1cf997f41d59 /gcc | |
parent | f8c677776617ab91826af1d950b00d853eaff622 (diff) | |
download | gcc-e140f5fd3e235c5a37dc99b79f37a5ad4dc59064.zip gcc-e140f5fd3e235c5a37dc99b79f37a5ad4dc59064.tar.gz gcc-e140f5fd3e235c5a37dc99b79f37a5ad4dc59064.tar.bz2 |
aarch64: Split vec_selects of bottom elements into simple move
In certain intrinsics use cases GCC leaves SETs of a bottom-element vec
select lying around:
(vec_select:DI (reg:V2DI 34 v2 [orig:128 __o ] [128])
(parallel [
(const_int 0 [0])
])))
This can be treated as a simple move in aarch64 when done between SIMD
registers for all normal widths.
These go through the aarch64_get_lane pattern.
This patch adds a splitter there to simplify these extracts to a move
that can, perhaps, be optimised a way.
Another benefit is if the destination is memory we can use a simpler STR
instruction rather than ST1-lane.
gcc/
* config/aarch64/aarch64-simd.md (aarch64_get_lane<mode>):
Convert to define_insn_and_split. Split into simple move when moving
bottom element.
gcc/testsuite/
* gcc.target/aarch64/vdup_lane_2.c: Scan for fmov rather than
dup.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c | 2 |
2 files changed, 10 insertions, 2 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 41071b6..d7acd72 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3312,7 +3312,9 @@ ;; Lane extraction of a value, neither sign nor zero extension ;; is guaranteed so upper bits should be considered undefined. ;; RTL uses GCC vector extension indices throughout so flip only for assembly. -(define_insn "aarch64_get_lane<mode>" +;; Extracting lane zero is split into a simple move when it is between SIMD +;; registers or a store. +(define_insn_and_split "aarch64_get_lane<mode>" [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv") (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w, w, w") @@ -3332,6 +3334,12 @@ gcc_unreachable (); } } + "&& reload_completed + && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" + [(set (match_dup 0) (match_dup 1))] + { + operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode); + } [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c index a49db3e..16f4808 100644 --- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -333,7 +333,7 @@ main () /* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ /* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32. */ -/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ +/* { dg-final { scan-assembler-times {fmov\ts0, s1} 1 } } */ /* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ /* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ |