aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-03-26 16:08:32 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-03-26 16:08:32 +0000
commit7c679969bac9b7ae5e9446bfaa5466e19063d690 (patch)
tree667828ec4157edded0b696f774f07e9d2ad4a02c /gcc
parentd1ff0847b2df6ad21f77e26e7e516643c5aa7d40 (diff)
downloadgcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.zip
gcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.tar.gz
gcc-7c679969bac9b7ae5e9446bfaa5466e19063d690.tar.bz2
aarch64: Add costs for one element of a scatter store
Currently each element in a gather load is costed as a scalar_load and each element in a scatter store is costed as a scalar_store. The load side seems to work pretty well in practice, since many CPU-specific costs give loads quite a high cost relative to arithmetic operations. However, stores usually have a cost of just 1, which means that scatters tend to appear too cheap. This patch adds a separate cost for one element in a scatter store. Like with the previous patches, this one only becomes active if a CPU selects use_new_vector_costs. It should therefore have a very low impact on other CPUs. gcc/ * config/aarch64/aarch64-protos.h (sve_vec_cost::scatter_store_elt_cost): New member variable. * config/aarch64/aarch64.c (generic_sve_vector_cost): Update accordingly, taking the cost from the cost of a scalar_store. (a64fx_sve_vector_cost): Likewise. (aarch64_detect_vector_stmt_subtype): Detect scatter stores.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h9
-rw-r--r--gcc/config/aarch64/aarch64.c13
2 files changed, 18 insertions, 4 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index fabe3df..2ffa96e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -256,12 +256,14 @@ struct sve_vec_cost : simd_vec_cost
unsigned int clast_cost,
unsigned int fadda_f16_cost,
unsigned int fadda_f32_cost,
- unsigned int fadda_f64_cost)
+ unsigned int fadda_f64_cost,
+ unsigned int scatter_store_elt_cost)
: simd_vec_cost (base),
clast_cost (clast_cost),
fadda_f16_cost (fadda_f16_cost),
fadda_f32_cost (fadda_f32_cost),
- fadda_f64_cost (fadda_f64_cost)
+ fadda_f64_cost (fadda_f64_cost),
+ scatter_store_elt_cost (scatter_store_elt_cost)
{}
/* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
@@ -274,6 +276,9 @@ struct sve_vec_cost : simd_vec_cost
const int fadda_f16_cost;
const int fadda_f32_cost;
const int fadda_f64_cost;
+
+ /* The per-element cost of a scatter store. */
+ const int scatter_store_elt_cost;
};
/* Cost for vector insn classes. */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 20bb75b..7f72741 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -638,7 +638,8 @@ static const sve_vec_cost generic_sve_vector_cost =
2, /* clast_cost */
2, /* fadda_f16_cost */
2, /* fadda_f32_cost */
- 2 /* fadda_f64_cost */
+ 2, /* fadda_f64_cost */
+ 1 /* scatter_store_elt_cost */
};
/* Generic costs for vector insn classes. */
@@ -705,7 +706,8 @@ static const sve_vec_cost a64fx_sve_vector_cost =
13, /* clast_cost */
13, /* fadda_f16_cost */
13, /* fadda_f32_cost */
- 13 /* fadda_f64_cost */
+ 13, /* fadda_f64_cost */
+ 1 /* scatter_store_elt_cost */
};
static const struct cpu_vector_cost a64fx_vector_cost =
@@ -14279,6 +14281,13 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
&& DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
return simd_costs->store_elt_extra_cost;
+ /* Detect cases in which a scalar_store is really storing one element
+ in a scatter operation. */
+ if (kind == scalar_store
+ && sve_costs
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+ return sve_costs->scatter_store_elt_cost;
+
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
if (kind == vec_to_scalar
&& where == vect_body