aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-03-26 16:08:31 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-03-26 16:08:31 +0000
commitd1ff0847b2df6ad21f77e26e7e516643c5aa7d40 (patch)
tree440a0b8c313edb0c1413035c48bd295afca6dbe7 /gcc
parentb1a831f0dd869543788f08f94dc7ff64df3f2064 (diff)
downloadgcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.zip
gcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.tar.gz
gcc-d1ff0847b2df6ad21f77e26e7e516643c5aa7d40.tar.bz2
aarch64: Add costs for storing one element of a vector
Storing one element of a vector is costed as a vec_to_scalar followed by a scalar_store. However, vec_to_scalar is also used for reductions and for vector-to-GPR moves, which makes it difficult to pick one cost for them all. This patch therefore adds a cost for extracting one element of a vector in preparation for storing it out. The store itself is still costed separately. Like with the previous patches, this one only becomes active if a CPU selects use_new_vector_costs. It should therefore have a very low impact on other CPUs. gcc/ * config/aarch64/aarch64-protos.h (simd_vec_cost::store_elt_extra_cost): New member variable. * config/aarch64/aarch64.c (generic_advsimd_vector_cost): Update accordingly, using the vec_to_scalar cost for the new field. (generic_sve_vector_cost, a64fx_advsimd_vector_cost): Likewise. (a64fx_sve_vector_cost, qdf24xx_advsimd_vector_cost): Likewise. (thunderx_advsimd_vector_cost, tsv110_advsimd_vector_cost): Likewise. (cortexa57_advsimd_vector_cost, exynosm1_advsimd_vector_cost) (xgene1_advsimd_vector_cost, thunderx2t99_advsimd_vector_cost) (thunderx3t110_advsimd_vector_cost): Likewise. (aarch64_detect_vector_stmt_subtype): Detect single-element stores.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h4
-rw-r--r--gcc/config/aarch64/aarch64.c20
2 files changed, 24 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3d15275..fabe3df 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -224,6 +224,10 @@ struct simd_vec_cost
const int reduc_f32_cost;
const int reduc_f64_cost;
+ /* Additional cost of storing a single vector element, on top of the
+ normal cost of a scalar store. */
+ const int store_elt_extra_cost;
+
/* Cost of a vector-to-scalar operation. */
const int vec_to_scalar_cost;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8fb723d..20bb75b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -601,6 +601,7 @@ static const advsimd_vec_cost generic_advsimd_vector_cost =
2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
@@ -626,6 +627,7 @@ static const sve_vec_cost generic_sve_vector_cost =
2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
@@ -667,6 +669,7 @@ static const advsimd_vec_cost a64fx_advsimd_vector_cost =
13, /* reduc_f16_cost */
13, /* reduc_f32_cost */
13, /* reduc_f64_cost */
+ 13, /* store_elt_extra_cost */
13, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
6, /* align_load_cost */
@@ -691,6 +694,7 @@ static const sve_vec_cost a64fx_sve_vector_cost =
13, /* reduc_f16_cost */
13, /* reduc_f32_cost */
13, /* reduc_f64_cost */
+ 13, /* store_elt_extra_cost */
13, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
6, /* align_load_cost */
@@ -731,6 +735,7 @@ static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
1, /* reduc_f16_cost */
1, /* reduc_f32_cost */
1, /* reduc_f64_cost */
+ 1, /* store_elt_extra_cost */
1, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
@@ -768,6 +773,7 @@ static const advsimd_vec_cost thunderx_advsimd_vector_cost =
2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
3, /* align_load_cost */
@@ -804,6 +810,7 @@ static const advsimd_vec_cost tsv110_advsimd_vector_cost =
3, /* reduc_f16_cost */
3, /* reduc_f32_cost */
3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
3, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
5, /* align_load_cost */
@@ -839,6 +846,7 @@ static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
8, /* reduc_f16_cost */
8, /* reduc_f32_cost */
8, /* reduc_f64_cost */
+ 8, /* store_elt_extra_cost */
8, /* vec_to_scalar_cost */
8, /* scalar_to_vec_cost */
4, /* align_load_cost */
@@ -875,6 +883,7 @@ static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
3, /* reduc_f16_cost */
3, /* reduc_f32_cost */
3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
3, /* vec_to_scalar_cost */
3, /* scalar_to_vec_cost */
5, /* align_load_cost */
@@ -910,6 +919,7 @@ static const advsimd_vec_cost xgene1_advsimd_vector_cost =
4, /* reduc_f16_cost */
4, /* reduc_f32_cost */
4, /* reduc_f64_cost */
+ 4, /* store_elt_extra_cost */
4, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
10, /* align_load_cost */
@@ -946,6 +956,7 @@ static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
6, /* reduc_f16_cost */
6, /* reduc_f32_cost */
6, /* reduc_f64_cost */
+ 6, /* store_elt_extra_cost */
6, /* vec_to_scalar_cost */
5, /* scalar_to_vec_cost */
4, /* align_load_cost */
@@ -982,6 +993,7 @@ static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
5, /* reduc_f16_cost */
5, /* reduc_f32_cost */
5, /* reduc_f64_cost */
+ 5, /* store_elt_extra_cost */
5, /* vec_to_scalar_cost */
5, /* scalar_to_vec_cost */
4, /* align_load_cost */
@@ -14259,6 +14271,14 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
sve_costs = aarch64_tune_params.vec_costs->sve;
+ /* Detect cases in which vec_to_scalar is describing the extraction of a
+ vector element in preparation for a scalar store. The store itself is
+ costed separately. */
+ if (kind == vec_to_scalar
+ && STMT_VINFO_DATA_REF (stmt_info)
+ && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+ return simd_costs->store_elt_extra_cost;
+
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
if (kind == vec_to_scalar
&& where == vect_body