aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-04 09:42:37 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-04 09:43:17 +0100
commit93c26deab98fc80b616a1c53c324a88f61036f53 (patch)
tree0465b097cafe1a4bce8e0fac662bcdbd0828a2ac
parentd840bc5cab39aa3dd8222d72b2cd40942bf91c93 (diff)
downloadgcc-93c26deab98fc80b616a1c53c324a88f61036f53.zip
gcc-93c26deab98fc80b616a1c53c324a88f61036f53.tar.gz
gcc-93c26deab98fc80b616a1c53c324a88f61036f53.tar.bz2
aarch64: PR target/99195 annotate simple ternary ops for vec-concat with zero
We're now moving onto various simple ternary instructions, including some lane forms. These include intrinsics that map down to mla, mls, fma, aba, bsl instructions. Tests are added for lane 0 and lane 1 as for some of these instructions the lane 0 variants use separate simpler patterns that need a separate annotation. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_<su>aba<mode>): Rename to... (aarch64_<su>aba<mode><vczle><vczbe>): ... This. (aarch64_mla<mode>): Rename to... (aarch64_mla<mode><vczle><vczbe>): ... This. (*aarch64_mla_elt<mode>): Rename to... (*aarch64_mla_elt<mode><vczle><vczbe>): ... This. (*aarch64_mla_elt_<vswap_width_name><mode>): Rename to... (*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>): ... This. (aarch64_mla_n<mode>): Rename to... (aarch64_mla_n<mode><vczle><vczbe>): ... This. (aarch64_mls<mode>): Rename to... (aarch64_mls<mode><vczle><vczbe>): ... This. (*aarch64_mls_elt<mode>): Rename to... (*aarch64_mls_elt<mode><vczle><vczbe>): ... This. (*aarch64_mls_elt_<vswap_width_name><mode>): Rename to... (*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>): ... This. (aarch64_mls_n<mode>): Rename to... (aarch64_mls_n<mode><vczle><vczbe>): ... This. (fma<mode>4): Rename to... (fma<mode>4<vczle><vczbe>): ... This. (*aarch64_fma4_elt<mode>): Rename to... (*aarch64_fma4_elt<mode><vczle><vczbe>): ... This. (*aarch64_fma4_elt_<vswap_width_name><mode>): Rename to... (*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>): ... This. (*aarch64_fma4_elt_from_dup<mode>): Rename to... (*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>): ... This. (fnma<mode>4): Rename to... (fnma<mode>4<vczle><vczbe>): ... This. (*aarch64_fnma4_elt<mode>): Rename to... (*aarch64_fnma4_elt<mode><vczle><vczbe>): ... This. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Rename to... (*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>): ... This. (*aarch64_fnma4_elt_from_dup<mode>): Rename to... (*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>): ... This. (aarch64_simd_bsl<mode>_internal): Rename to... (aarch64_simd_bsl<mode>_internal<vczle><vczbe>): ... This. (*aarch64_simd_bsl<mode>_alt): Rename to... (*aarch64_simd_bsl<mode>_alt<vczle><vczbe>): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_3.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c68
2 files changed, 87 insertions, 19 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 511d1e7..705c4b0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1097,7 +1097,7 @@
}
)
-(define_insn "aarch64_<su>aba<mode>"
+(define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (minus:VDQ_BHSI
(USMAX:VDQ_BHSI
@@ -1551,7 +1551,7 @@
)
-(define_insn "aarch64_mla<mode>"
+(define_insn "aarch64_mla<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (mult:VDQ_BHSI
(match_operand:VDQ_BHSI 2 "register_operand" "w")
@@ -1562,7 +1562,7 @@
[(set_attr "type" "neon_mla_<Vetype><q>")]
)
-(define_insn "*aarch64_mla_elt<mode>"
+(define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
@@ -1580,7 +1580,7 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
@@ -1598,7 +1598,7 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mla_n<mode>"
+(define_insn "aarch64_mla_n<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
@@ -1611,7 +1611,7 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mls<mode>"
+(define_insn "aarch64_mls<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
@@ -1621,7 +1621,7 @@
[(set_attr "type" "neon_mla_<Vetype><q>")]
)
-(define_insn "*aarch64_mls_elt<mode>"
+(define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
@@ -1639,7 +1639,7 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
@@ -1657,7 +1657,7 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_mls_n<mode>"
+(define_insn "aarch64_mls_n<mode><vczle><vczbe>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 1 "register_operand" "0")
@@ -3077,7 +3077,7 @@
}
)
-(define_insn "fma<mode>4"
+(define_insn "fma<mode>4<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")
@@ -3087,7 +3087,7 @@
[(set_attr "type" "neon_fp_mla_<stype><q>")]
)
-(define_insn "*aarch64_fma4_elt<mode>"
+(define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(vec_duplicate:VDQF
@@ -3104,7 +3104,7 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(vec_duplicate:VDQSF
@@ -3121,7 +3121,7 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(vec_duplicate:VMUL
@@ -3149,7 +3149,7 @@
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
)
-(define_insn "fnma<mode>4"
+(define_insn "fnma<mode>4<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
@@ -3160,7 +3160,7 @@
[(set_attr "type" "neon_fp_mla_<stype><q>")]
)
-(define_insn "*aarch64_fnma4_elt<mode>"
+(define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(neg:VDQF
@@ -3178,7 +3178,7 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
+(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(neg:VDQSF
@@ -3196,7 +3196,7 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
-(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
+(define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(neg:VMUL
@@ -3808,7 +3808,7 @@
;; Some forms of straight-line code may generate the equivalent form
;; in *aarch64_simd_bsl<mode>_alt.
-(define_insn "aarch64_simd_bsl<mode>_internal"
+(define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
@@ -3832,7 +3832,7 @@
;; the first. The two are equivalent but since recog doesn't try all
;; permutations of commutative operations, we have to have a separate pattern.
-(define_insn "*aarch64_simd_bsl<mode>_alt"
+(define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c
new file mode 100644
index 0000000..c751924
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_3.c
@@ -0,0 +1,68 @@
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define TERNARY(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2) \
+FUNC (T, IS, OS, OP1, S) \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3) \
+FUNC (T, IS, OS, OP1, S) \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4) \
+FUNC (T, IS, OS, OP1, S) \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+OPTHREE (int8, 8, 16, s8, mla, mls, aba)
+OPTHREE (int16, 4, 8, s16, mla, mls, aba)
+OPTHREE (int32, 2, 4, s32, mla, mls, aba)
+
+OPFOUR (uint8, 8, 16, u8, mla, mls, aba, bsl)
+OPFOUR (uint16, 4, 8, u16, mla, mls, aba, bsl)
+OPFOUR (uint32, 2, 4, u32, mla, mls, aba, bsl)
+
+OPTHREE (float32, 2, 4, f32, mla, fma, fms)
+
+#undef FUNC
+#define TERNARY_LANE(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros); \
+} \
+OT \
+foo_##OP##_##S##_lane1 (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) TERNARY_LANE (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, mla_lane, mls_lane)
+OPTWO (int32, 2, 4, s32, mla_lane, mls_lane)
+
+OPTWO (uint16, 4, 8, u16, mla_lane, mls_lane)
+OPTWO (uint32, 2, 4, u32, mla_lane, mls_lane)
+
+OPTHREE (float32, 2, 4, f32, mla_lane, fma_lane, fms_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+