diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-03-26 16:08:31 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-03-26 16:08:31 +0000 |
commit | d1ff0847b2df6ad21f77e26e7e516643c5aa7d40 (patch) | |
tree | 440a0b8c313edb0c1413035c48bd295afca6dbe7 /gcc | |
parent | b1a831f0dd869543788f08f94dc7ff64df3f2064 (diff) | |
download | gcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.zip gcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.tar.gz gcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.tar.bz2 |
aarch64: Add costs for storing one element of a vector
Storing one element of a vector is costed as a vec_to_scalar
followed by a scalar_store. However, vec_to_scalar is also
used for reductions and for vector-to-GPR moves, which makes
it difficult to pick one cost for them all.
This patch therefore adds a cost for extracting one element
of a vector in preparation for storing it out. The store
itself is still costed separately.
Like with the previous patches, this one only becomes active if
a CPU selects use_new_vector_costs. It should therefore have
a very low impact on other CPUs.
gcc/
* config/aarch64/aarch64-protos.h
(simd_vec_cost::store_elt_extra_cost): New member variable.
* config/aarch64/aarch64.c (generic_advsimd_vector_cost): Update
accordingly, using the vec_to_scalar cost for the new field.
(generic_sve_vector_cost, a64fx_advsimd_vector_cost): Likewise.
(a64fx_sve_vector_cost, qdf24xx_advsimd_vector_cost): Likewise.
(thunderx_advsimd_vector_cost, tsv110_advsimd_vector_cost): Likewise.
(cortexa57_advsimd_vector_cost, exynosm1_advsimd_vector_cost)
(xgene1_advsimd_vector_cost, thunderx2t99_advsimd_vector_cost)
(thunderx3t110_advsimd_vector_cost): Likewise.
(aarch64_detect_vector_stmt_subtype): Detect single-element stores.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 20 |
2 files changed, 24 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 3d15275..fabe3df 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -224,6 +224,10 @@ struct simd_vec_cost const int reduc_f32_cost; const int reduc_f64_cost; + /* Additional cost of storing a single vector element, on top of the + normal cost of a scalar store. */ + const int store_elt_extra_cost; + /* Cost of a vector-to-scalar operation. */ const int vec_to_scalar_cost; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 8fb723d..20bb75b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -601,6 +601,7 @@ static const advsimd_vec_cost generic_advsimd_vector_cost = 2, /* reduc_f16_cost */ 2, /* reduc_f32_cost */ 2, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ 2, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* align_load_cost */ @@ -626,6 +627,7 @@ static const sve_vec_cost generic_sve_vector_cost = 2, /* reduc_f16_cost */ 2, /* reduc_f32_cost */ 2, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ 2, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* align_load_cost */ @@ -667,6 +669,7 @@ static const advsimd_vec_cost a64fx_advsimd_vector_cost = 13, /* reduc_f16_cost */ 13, /* reduc_f32_cost */ 13, /* reduc_f64_cost */ + 13, /* store_elt_extra_cost */ 13, /* vec_to_scalar_cost */ 4, /* scalar_to_vec_cost */ 6, /* align_load_cost */ @@ -691,6 +694,7 @@ static const sve_vec_cost a64fx_sve_vector_cost = 13, /* reduc_f16_cost */ 13, /* reduc_f32_cost */ 13, /* reduc_f64_cost */ + 13, /* store_elt_extra_cost */ 13, /* vec_to_scalar_cost */ 4, /* scalar_to_vec_cost */ 6, /* align_load_cost */ @@ -731,6 +735,7 @@ static const advsimd_vec_cost qdf24xx_advsimd_vector_cost = 1, /* reduc_f16_cost */ 1, /* reduc_f32_cost */ 1, /* reduc_f64_cost */ + 1, /* store_elt_extra_cost */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* align_load_cost */ @@ -768,6 +773,7 @@ static const advsimd_vec_cost thunderx_advsimd_vector_cost = 2, /* reduc_f16_cost */ 2, /* reduc_f32_cost */ 2, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ 2, /* vec_to_scalar_cost */ 2, /* scalar_to_vec_cost */ 3, /* align_load_cost */ @@ -804,6 +810,7 @@ static const advsimd_vec_cost tsv110_advsimd_vector_cost = 3, /* reduc_f16_cost */ 3, /* reduc_f32_cost */ 3, /* reduc_f64_cost */ + 3, /* store_elt_extra_cost */ 3, /* vec_to_scalar_cost */ 2, /* scalar_to_vec_cost */ 5, /* align_load_cost */ @@ -839,6 +846,7 @@ static const advsimd_vec_cost cortexa57_advsimd_vector_cost = 8, /* reduc_f16_cost */ 8, /* reduc_f32_cost */ 8, /* reduc_f64_cost */ + 8, /* store_elt_extra_cost */ 8, /* vec_to_scalar_cost */ 8, /* scalar_to_vec_cost */ 4, /* align_load_cost */ @@ -875,6 +883,7 @@ static const advsimd_vec_cost exynosm1_advsimd_vector_cost = 3, /* reduc_f16_cost */ 3, /* reduc_f32_cost */ 3, /* reduc_f64_cost */ + 3, /* store_elt_extra_cost */ 3, /* vec_to_scalar_cost */ 3, /* scalar_to_vec_cost */ 5, /* align_load_cost */ @@ -910,6 +919,7 @@ static const advsimd_vec_cost xgene1_advsimd_vector_cost = 4, /* reduc_f16_cost */ 4, /* reduc_f32_cost */ 4, /* reduc_f64_cost */ + 4, /* store_elt_extra_cost */ 4, /* vec_to_scalar_cost */ 4, /* scalar_to_vec_cost */ 10, /* align_load_cost */ @@ -946,6 +956,7 @@ static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost = 6, /* reduc_f16_cost */ 6, /* reduc_f32_cost */ 6, /* reduc_f64_cost */ + 6, /* store_elt_extra_cost */ 6, /* vec_to_scalar_cost */ 5, /* scalar_to_vec_cost */ 4, /* align_load_cost */ @@ -982,6 +993,7 @@ static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost = 5, /* reduc_f16_cost */ 5, /* reduc_f32_cost */ 5, /* reduc_f64_cost */ + 5, /* store_elt_extra_cost */ 5, /* vec_to_scalar_cost */ 5, /* scalar_to_vec_cost */ 4, /* align_load_cost */ @@ -14259,6 +14271,14 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (aarch64_sve_mode_p (TYPE_MODE (vectype))) sve_costs = aarch64_tune_params.vec_costs->sve; + /* Detect cases in which vec_to_scalar is describing the extraction of a + vector element in preparation for a scalar store. The store itself is + costed separately. */ + if (kind == vec_to_scalar + && STMT_VINFO_DATA_REF (stmt_info) + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) + return simd_costs->store_elt_extra_cost; + /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ if (kind == vec_to_scalar && where == vect_body |