diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-03-26 16:08:32 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-03-26 16:08:32 +0000 |
commit | 7c679969bac9b7ae5e9446bfaa5466e19063d690 (patch) | |
tree | 667828ec4157edded0b696f774f07e9d2ad4a02c /gcc | |
parent | d1ff0847b2df6ad21f77e26e7e516643c5aa7d40 (diff) | |
download | gcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.zip gcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.tar.gz gcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.tar.bz2 |
aarch64: Add costs for one element of a scatter store
Currently each element in a gather load is costed as a scalar_load
and each element in a scatter store is costed as a scalar_store.
The load side seems to work pretty well in practice, since many
CPU-specific costs give loads quite a high cost relative to
arithmetic operations. However, stores usually have a cost
of just 1, which means that scatters tend to appear too cheap.
This patch adds a separate cost for one element in a scatter store.
Like with the previous patches, this one only becomes active if
a CPU selects use_new_vector_costs. It should therefore have
a very low impact on other CPUs.
gcc/
* config/aarch64/aarch64-protos.h
(sve_vec_cost::scatter_store_elt_cost): New member variable.
* config/aarch64/aarch64.c (generic_sve_vector_cost): Update
accordingly, taking the cost from the cost of a scalar_store.
(a64fx_sve_vector_cost): Likewise.
(aarch64_detect_vector_stmt_subtype): Detect scatter stores.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 13 |
2 files changed, 18 insertions, 4 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index fabe3df..2ffa96e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -256,12 +256,14 @@ struct sve_vec_cost : simd_vec_cost unsigned int clast_cost, unsigned int fadda_f16_cost, unsigned int fadda_f32_cost, - unsigned int fadda_f64_cost) + unsigned int fadda_f64_cost, + unsigned int scatter_store_elt_cost) : simd_vec_cost (base), clast_cost (clast_cost), fadda_f16_cost (fadda_f16_cost), fadda_f32_cost (fadda_f32_cost), - fadda_f64_cost (fadda_f64_cost) + fadda_f64_cost (fadda_f64_cost), + scatter_store_elt_cost (scatter_store_elt_cost) {} /* The cost of a vector-to-scalar CLASTA or CLASTB instruction, @@ -274,6 +276,9 @@ struct sve_vec_cost : simd_vec_cost const int fadda_f16_cost; const int fadda_f32_cost; const int fadda_f64_cost; + + /* The per-element cost of a scatter store. */ + const int scatter_store_elt_cost; }; /* Cost for vector insn classes. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 20bb75b..7f72741 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -638,7 +638,8 @@ static const sve_vec_cost generic_sve_vector_cost = 2, /* clast_cost */ 2, /* fadda_f16_cost */ 2, /* fadda_f32_cost */ - 2 /* fadda_f64_cost */ + 2, /* fadda_f64_cost */ + 1 /* scatter_store_elt_cost */ }; /* Generic costs for vector insn classes. */ @@ -705,7 +706,8 @@ static const sve_vec_cost a64fx_sve_vector_cost = 13, /* clast_cost */ 13, /* fadda_f16_cost */ 13, /* fadda_f32_cost */ - 13 /* fadda_f64_cost */ + 13, /* fadda_f64_cost */ + 1 /* scatter_store_elt_cost */ }; static const struct cpu_vector_cost a64fx_vector_cost = @@ -14279,6 +14281,13 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) return simd_costs->store_elt_extra_cost; + /* Detect cases in which a scalar_store is really storing one element + in a scatter operation. */ + if (kind == scalar_store + && sve_costs + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER) + return sve_costs->scatter_store_elt_cost; + /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ if (kind == vec_to_scalar && where == vect_body |