aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-02-01 13:50:43 +0000
committerTamar Christina <tamar.christina@arm.com>2021-02-01 13:50:43 +0000
commit0a3eccb6ef9351cf8668eede8060dd7481794cd2 (patch)
tree3097d45da393854ac6d9d5f4eb7cfe7d67d8c7c6 /gcc
parent1b303ef6cc8a5913345cbcd91abf13075ab2aec9 (diff)
downloadgcc-0a3eccb6ef9351cf8668eede8060dd7481794cd2.zip
gcc-0a3eccb6ef9351cf8668eede8060dd7481794cd2.tar.gz
gcc-0a3eccb6ef9351cf8668eede8060dd7481794cd2.tar.bz2
AArch64: Change canonization of smlal and smlsl in order to be able to optimize the vec_dup
g:87301e3956d44ad45e384a8eb16c79029d20213a and g:ee4c4fe289e768d3c6b6651c8bfa3fdf458934f4 changed the intrinsics to be proper RTL but accidentally ended up creating a regression because of the ordering in the RTL pattern. The existing RTL that combine should try to match to remove the vec_dup is aarch64_vec_<su>mlal_lane<Qlane> and aarch64_vec_<su>mult_lane<Qlane> which expects the select register to be the second operand of mult. The pattern introduced has it as the first operand so combine was unable to remove the vec_dup. This flips the order such that the patterns optimize correctly. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<su>mlal_n<mode>, aarch64_<su>mlsl<mode>, aarch64_<su>mlsl_n<mode>): Flip mult operands. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c45
2 files changed, 54 insertions, 9 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bca2d8a..d185866 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1950,10 +1950,10 @@
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
- (vec_duplicate:VD_HSI
- (match_operand:<VEL> 3 "register_operand" "<h_con>")))
+ (match_operand:VD_HSI 2 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 2 "register_operand" "w")))
+ (vec_duplicate:VD_HSI
+ (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
@@ -1980,10 +1980,10 @@
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
- (vec_duplicate:VD_HSI
- (match_operand:<VEL> 3 "register_operand" "<h_con>")))
+ (match_operand:VD_HSI 2 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 2 "register_operand" "w")))))]
+ (vec_duplicate:VD_HSI
+ (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
"TARGET_SIMD"
"<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2078,10 +2078,10 @@
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
- (match_operand:<VEL> 2 "register_operand" "<h_con>")))
+ (match_operand:VD_HSI 1 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 1 "register_operand" "w"))))]
+ (vec_duplicate:<VCOND>
+ (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
"TARGET_SIMD"
"<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c
new file mode 100644
index 0000000..1e963e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target aarch64-*-* } } */
+
+#include <arm_neon.h>
+
+/*
+**add:
+** smlal v0.4s, v1.4h, v2.h[3]
+** ret
+*/
+
+int32x4_t add(int32x4_t acc, int16x4_t b, int16x4_t c) {
+ return vmlal_n_s16(acc, b, c[3]);
+}
+
+/*
+**sub:
+** smlsl v0.4s, v1.4h, v2.h[3]
+** ret
+*/
+
+int32x4_t sub(int32x4_t acc, int16x4_t b, int16x4_t c) {
+ return vmlsl_n_s16(acc, b, c[3]);
+}
+
+/*
+**smull:
+** smull v0.4s, v1.4h, v2.h[3]
+** ret
+*/
+
+int32x4_t smull(int16x4_t b, int16x4_t c) {
+ return vmull_n_s16(b, c[3]);
+}
+
+/*
+**umull:
+** umull v0.4s, v1.4h, v2.h[3]
+** ret
+*/
+
+uint32x4_t umull(uint16x4_t b, uint16x4_t c) {
+ return vmull_n_u16(b, c[3]);
+}
+
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" {-O[^0]} } } */