diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2024-04-05 14:47:15 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2024-04-05 14:47:15 +0100 |
commit | 67cbb1c638d6ab3a9cb77e674541e2b291fb67df (patch) | |
tree | 20aec7a29ba46975301a814b856e5c5f3881bf41 | |
parent | e4d074321bcafa6504ee6b77754b4450a4622f9d (diff) | |
download | gcc-67cbb1c638d6ab3a9cb77e674541e2b291fb67df.zip gcc-67cbb1c638d6ab3a9cb77e674541e2b291fb67df.tar.gz gcc-67cbb1c638d6ab3a9cb77e674541e2b291fb67df.tar.bz2 |
aarch64: Fix bogus cnot optimisation [PR114603]
aarch64-sve.md had a pattern that combined:
cmpeq pb.T, pa/z, zc.T, #0
mov zd.T, pb/z, #1
into:
cnot zd.T, pa/m, zc.T
But this is only valid if pa.T is a ptrue. In other cases, the
original would set inactive elements of zd.T to 0, whereas the
combined form would copy elements from zc.T.
gcc/
PR target/114603
* config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
with...
(@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
a ptrue.
(*cnot<mode>): Require operand 1 to be a ptrue.
* config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
with a ptrue. Represent other _x operations as fully-defined _m
operations.
gcc/testsuite/
PR target/114603
* gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-base.cc | 25 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c | 23 |
3 files changed, 50 insertions, 20 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 257ca5b..5be2315 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -517,15 +517,22 @@ public: expand (function_expander &e) const override { machine_mode mode = e.vector_mode (0); - if (e.pred == PRED_x) - { - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs - a ptrue hint. */ - e.add_ptrue_hint (0, e.gp_mode (0)); - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); - } - - return e.use_cond_insn (code_for_cond_cnot (mode), 0); + machine_mode pred_mode = e.gp_mode (0); + /* The underlying _x pattern is effectively: + + dst = src == 0 ? 1 : 0 + + rather than an UNSPEC_PRED_X. Using this form allows autovec + constructs to be matched by combine, but it means that the + predicate on the src == 0 comparison must be all-true. + + For simplicity, represent other _x operations as fully-defined _m + operations rather than using a separate bespoke pattern. */ + if (e.pred == PRED_x + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode)) + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode)); + return e.use_cond_insn (code_for_cond_cnot (mode), + e.pred == PRED_x ? 1 : 0); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index eca8623..0434358 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3363,24 +3363,24 @@ ;; - CNOT ;; ------------------------------------------------------------------------- -;; Predicated logical inverse. -(define_expand "@aarch64_pred_cnot<mode>" +;; Logical inverse, predicated with a ptrue. +(define_expand "@aarch64_ptrue_cnot<mode>" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> - (match_operand:SVE_FULL_I 3 "register_operand") - (match_dup 4))] + (match_operand:SVE_FULL_I 2 "register_operand") + (match_dup 3))] UNSPEC_PRED_Z) - (match_dup 5) - (match_dup 4)] + (match_dup 4) + (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE" { - operands[4] = CONST0_RTX (<MODE>mode); - operands[5] = CONST1_RTX (<MODE>mode); + operands[3] = CONST0_RTX (<MODE>mode); + operands[4] = CONST1_RTX (<MODE>mode); } ) @@ -3389,7 +3389,7 @@ (unspec:SVE_I [(unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> (match_operand:SVE_I 2 "register_operand") (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] @@ -11001,4 +11001,4 @@ GET_MODE (operands[2])); return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>"; } -)
\ No newline at end of file +) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c new file mode 100644 index 0000000..b1a489f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c @@ -0,0 +1,23 @@ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** foo: +** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0 +** mov z0\.s, \1/z, #1 +** ret +*/ +svint32_t foo(svbool_t pg, svint32_t y) +{ + return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0)); +} + +#ifdef __cplusplus +} +#endif |