diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2023-08-24 10:18:05 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2023-08-24 10:18:05 +0100 |
commit | aa81e80a5ae663f169496c580ba30ae281c83940 (patch) | |
tree | 9b640bf0d7ca569ccc8f6b0ae491fedbc0577b69 /gcc | |
parent | a28d4fce8ec2540259a257149de7081f27fb027e (diff) | |
download | gcc-aa81e80a5ae663f169496c580ba30ae281c83940.zip gcc-aa81e80a5ae663f169496c580ba30ae281c83940.tar.gz gcc-aa81e80a5ae663f169496c580ba30ae281c83940.tar.bz2 |
aarch64: Account for different Advanced SIMD fusing options
The scalar FNMADD/FNMSUB and SVE FNMLA/FNMLS instructions mean
that either side of a subtraction can start an accumulator chain.
However, Advanced SIMD doesn't have an equivalent instruction.
This means that, for Advanced SIMD, a subtraction can only be
fused if the second operand is a multiplication.
Also, if both sides of a subtraction are multiplications,
and if the second operand is used multiple times, such as:
c * d - a * b
e * f - a * b
then the first rather than second multiplication operand will tend
to be fused. On Advanced SIMD, this leads to:
tmp1 = a * b
tmp2 = -tmp1
... = tmp2 + c * d // FMLA
... = tmp2 + e * f // FMLA
where one of the FMLAs also requires a MOV.
This patch tries to account for this in the vector cost model.
It improves roms performance by 2-3% on Neoverse V1. It's also
needed to avoid a regression in fotonik for Neoverse N2 and
Neoverse V2 with the patch for PR110625.
gcc/
* config/aarch64/aarch64.cc: Include ssa.h.
(aarch64_multiply_add_p): Require the second operand of an
Advanced SIMD subtraction to be a multiplication. Assume that
such an operation won't be fused if the second operand is used
multiple times and if the first operand is also a multiplication.
gcc/testsuite/
* gcc.target/aarch64/neoverse_v1_2.c: New test.
* gcc.target/aarch64/neoverse_v1_3.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/neoverse_v1_2.c | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/neoverse_v1_3.c | 14 |
3 files changed, 47 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 0346281..37d4140 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -84,6 +84,7 @@ #include "aarch64-feature-deps.h" #include "config/arm/aarch-common.h" #include "config/arm/aarch-common-protos.h" +#include "ssa.h" /* This file should be included last. */ #include "target-def.h" @@ -16411,20 +16412,20 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info, if (code != PLUS_EXPR && code != MINUS_EXPR) return false; - for (int i = 1; i < 3; ++i) + auto is_mul_result = [&](int i) { tree rhs = gimple_op (assign, i); /* ??? Should we try to check for a single use as well? */ if (TREE_CODE (rhs) != SSA_NAME) - continue; + return false; stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); if (!def_stmt_info || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def) - continue; + return false; gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt); if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) - continue; + return false; if (vec_flags & VEC_ADVSIMD) { @@ -16444,8 +16445,19 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info, } return true; - } - return false; + }; + + if (code == MINUS_EXPR && (vec_flags & VEC_ADVSIMD)) + /* Advanced SIMD doesn't have FNMADD/FNMSUB/FNMLA/FNMLS, so the + multiplication must be on the second operand (to form an FMLS). + But if both operands are multiplications and the second operand + is used more than once, we'll instead negate the second operand + and use it as an accumulator for the first operand. */ + return (is_mul_result (2) + && (has_single_use (gimple_assign_rhs2 (assign)) + || !is_mul_result (1))); + + return is_mul_result (1) || is_mul_result (2); } /* Return true if STMT_INFO is the second part of a two-statement boolean AND diff --git a/gcc/testsuite/gcc.target/aarch64/neoverse_v1_2.c b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_2.c new file mode 100644 index 0000000..45d7e81 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_2.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -mcpu=neoverse-v1 --param aarch64-autovec-preference=1 -fdump-tree-vect-details" } */ + +void +f (float x[restrict][100], float y[restrict][100]) +{ + for (int i = 0; i < 100; ++i) + { + x[0][i] = y[0][i] * y[1][i] - y[3][i] * y[4][i]; + x[1][i] = y[1][i] * y[2][i] - y[3][i] * y[4][i]; + } +} + +/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times vector_stmt costs 2 } "vect" } } */ +/* { dg-final { scan-tree-dump-not {vector_stmt costs 0 } "vect" } } */ +/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times scalar_stmt costs 0 } "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/neoverse_v1_3.c b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_3.c new file mode 100644 index 0000000..de31fc1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_3.c @@ -0,0 +1,14 @@ +/* { dg-options "-O2 -mcpu=neoverse-v1 --param aarch64-autovec-preference=2 -fdump-tree-vect-details" } */ + +void +f (float x[restrict][100], float y[restrict][100]) +{ + for (int i = 0; i < 100; ++i) + { + x[0][i] = y[0][i] * y[1][i] - y[3][i] * y[4][i]; + x[1][i] = y[1][i] * y[2][i] - y[3][i] * y[4][i]; + } +} + +/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times vector_stmt costs 0 } "vect" } } */ +/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times scalar_stmt costs 0 } "vect" } } */ |