aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-10 10:40:06 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-10 10:42:22 +0100
commitd1e7f9993084b87e6676a5ccef3c8b7f807a6013 (patch)
treed23b8af6329899110564f65cae7e610bcc05c6b0
parent39d6d4256d16d676f8b9031c4d1d115ddf4ad76b (diff)
downloadgcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.zip
gcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.tar.gz
gcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.tar.bz2
aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero
This patch cleans up some almost-duplicate patterns for the XTN, SQXTN, UQXTN instructions. Using the <vczle><vczbe> attributes we can remove the BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN cases, as well as the intrinsic expanders that select between the two. Tests are also added. Thankfully the diffstat comes out negative \O/. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le): Delete. (aarch64_xtn<mode>_insn_be): Likewise. (trunc<mode><Vnarrowq>2): Rename to... (trunc<mode><Vnarrowq>2<vczle><vczbe>): ... This. (aarch64_xtn<mode>): Move under the above. Just emit the truncate RTL. (aarch64_<su>qmovn<mode>): Likewise. (aarch64_<su>qmovn<mode><vczle><vczbe>): New define_insn. (aarch64_<su>qmovn<mode>_insn_le): Delete. (aarch64_<su>qmovn<mode>_insn_be): Likewise. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_4.c: Add tests for vmovn, vqmovn.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md99
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c22
2 files changed, 37 insertions, 84 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4603888..9ad0489 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1743,47 +1743,6 @@
;; Narrowing operations.
-(define_insn "aarch64_xtn<mode>_insn_le"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
- (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
- "xtn\\t%0.<Vntype>, %1.<Vtype>"
- [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_insn "aarch64_xtn<mode>_insn_be"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
- "xtn\\t%0.<Vntype>, %1.<Vtype>"
- [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_expand "aarch64_xtn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand")
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
- "TARGET_SIMD"
- {
- rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
- CONST0_RTX (<VNARROWQ>mode)));
- else
- emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
- CONST0_RTX (<VNARROWQ>mode)));
-
- /* The intrinsic expects a narrow result, so emit a subreg that will get
- optimized away as appropriate. */
- emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
- <VNARROWQ2>mode));
- DONE;
- }
-)
-
(define_insn "aarch64_xtn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
@@ -5300,7 +5259,7 @@
;; sqmovn and uqmovn
-(define_insn "aarch64_<su>qmovn<mode>"
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(SAT_TRUNC:<VNARROWQ>
(match_operand:SD_HSDI 1 "register_operand" "w")))]
@@ -5309,48 +5268,13 @@
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
-(define_insn "aarch64_<su>qmovn<mode>_insn_le"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (SAT_TRUNC:<VNARROWQ>
- (match_operand:VQN 1 "register_operand" "w"))
- (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
- "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
- [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<su>qmovn<mode>_insn_be"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
- (SAT_TRUNC:<VNARROWQ>
- (match_operand:VQN 1 "register_operand" "w"))))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
- "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
- [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_<su>qmovn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand")
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(SAT_TRUNC:<VNARROWQ>
- (match_operand:VQN 1 "register_operand")))]
+ (match_operand:VQN 1 "register_operand" "w")))]
"TARGET_SIMD"
- {
- rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
- CONST0_RTX (<VNARROWQ>mode)));
- else
- emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
- CONST0_RTX (<VNARROWQ>mode)));
-
- /* The intrinsic expects a narrow result, so emit a subreg that will get
- optimized away as appropriate. */
- emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
- <VNARROWQ2>mode));
- DONE;
- }
+ "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_<su>qxtn2<mode>_le"
@@ -9281,7 +9205,7 @@
)
;; Truncate a 128-bit integer vector to a 64-bit vector.
-(define_insn "trunc<mode><Vnarrowq>2"
+(define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
"TARGET_SIMD"
@@ -9289,6 +9213,15 @@
[(set_attr "type" "neon_move_narrow_q")]
)
+;; Expander for the intrinsics that only takes one mode unlike the two-mode
+;; trunc optab.
+(define_expand "aarch64_xtn<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+ "TARGET_SIMD"
+ {}
+)
+
(define_insn "aarch64_bfdot<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(plus:VDQSF
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
index b6ef15b..6127cb2 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
@@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b) \
return vcombine_##OS (v##OP##_##IS (a, b), zeros); \
}
-
#define FUNC(OT,IT,IMT,IS,OS) \
MYOP (OT, IT, IMT, addhn, IS, OS) \
MYOP (OT, IT, IMT, subhn, IS, OS) \
@@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS) \
+OT \
+foo_##OP##_##OS (IT a) \
+{ \
+ IMT zeros = vcreate_##OS (0); \
+ return vcombine_##OS (v##OP##_##IS (a), zeros); \
+}
+
+#undef FUNC
+#define FUNC(OP) \
+MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8) \
+MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16) \
+MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32) \
+MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8) \
+MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16) \
+MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32) \
+
+FUNC (movn)
+FUNC (qmovn)
+
/* { dg-final { scan-assembler-not {\tfmov\t} } } */
/* { dg-final { scan-assembler-not {\tmov\t} } } */