aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2023-10-24 11:01:52 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2023-10-24 11:01:52 +0100
commit21416caf221fae4351319ef8ca8d41c0234bdfa7 (patch)
tree051c4267cdef6c6835509bbcb472e0dd61faafa0
parentb632a516a0448818a25f35e15a4bf0a3187af359 (diff)
downloadgcc-21416caf221fae4351319ef8ca8d41c0234bdfa7.zip
gcc-21416caf221fae4351319ef8ca8d41c0234bdfa7.tar.gz
gcc-21416caf221fae4351319ef8ca8d41c0234bdfa7.tar.bz2
aarch64: Define TARGET_INSN_COST
This patch adds a bare-bones TARGET_INSN_COST. See the comment in the patch for the rationale. Just to get a flavour for how much difference it makes, I tried compiling the testsuite with -Os -fno-schedule-insns{,2} and seeing what effect the patch had on the number of instructions. Very few tests changed, but all the changes were positive: Tests Good Bad Delta Best Worst Median ===== ==== === ===== ==== ===== ====== 19 19 0 -177 -52 -1 -4 The change for -O2 was even smaller, but more mixed: Tests Good Bad Delta Best Worst Median ===== ==== === ===== ==== ===== ====== 6 3 3 -8 -9 6 -2 There were no obvious effects on SPEC CPU2017. The patch is needed to avoid a regression with a later change. gcc/ * config/aarch64/aarch64.cc (aarch64_insn_cost): New function. (TARGET_INSN_COST): Define.
-rw-r--r--gcc/config/aarch64/aarch64.cc25
1 files changed, 25 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a28b66a..aec9b76 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15541,6 +15541,28 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
: aarch64_tune_params.memmov_cost.store_int);
}
+/* Implement TARGET_INSN_COST. We have the opportunity to do something
+ much more productive here, such as using insn attributes to cost things.
+ But we don't, not yet.
+
+ The main point of this current definition is to make calling insn_cost
+ on one instruction equivalent to calling seq_cost on a sequence that
+ contains only that instruction. The default definition would instead
+ only look at SET_SRCs, ignoring SET_DESTs.
+
+ This ensures that, for example, storing a 128-bit zero vector is more
+ expensive than storing a 128-bit vector register. A move of zero
+ into a 128-bit vector register followed by multiple stores of that
+ register is then cheaper than multiple stores of zero (which would
+ use STP of XZR). This in turn allows STP Qs to be formed. */
+static int
+aarch64_insn_cost (rtx_insn *insn, bool speed)
+{
+ if (rtx set = single_set (insn))
+ return set_rtx_cost (set, speed);
+ return pattern_cost (PATTERN (insn), speed);
+}
+
/* Implement TARGET_INIT_BUILTINS. */
static void
aarch64_init_builtins ()
@@ -28399,6 +28421,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST aarch64_insn_cost
+
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p