From 21416caf221fae4351319ef8ca8d41c0234bdfa7 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 24 Oct 2023 11:01:52 +0100 Subject: aarch64: Define TARGET_INSN_COST This patch adds a bare-bones TARGET_INSN_COST. See the comment in the patch for the rationale. Just to get a flavour for how much difference it makes, I tried compiling the testsuite with -Os -fno-schedule-insns{,2} and seeing what effect the patch had on the number of instructions. Very few tests changed, but all the changes were positive: Tests Good Bad Delta Best Worst Median ===== ==== === ===== ==== ===== ====== 19 19 0 -177 -52 -1 -4 The change for -O2 was even smaller, but more mixed: Tests Good Bad Delta Best Worst Median ===== ==== === ===== ==== ===== ====== 6 3 3 -8 -9 6 -2 There were no obvious effects on SPEC CPU2017. The patch is needed to avoid a regression with a later change. gcc/ * config/aarch64/aarch64.cc (aarch64_insn_cost): New function. (TARGET_INSN_COST): Define. --- gcc/config/aarch64/aarch64.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a28b66a..aec9b76 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15541,6 +15541,28 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in) : aarch64_tune_params.memmov_cost.store_int); } +/* Implement TARGET_INSN_COST. We have the opportunity to do something + much more productive here, such as using insn attributes to cost things. + But we don't, not yet. + + The main point of this current definition is to make calling insn_cost + on one instruction equivalent to calling seq_cost on a sequence that + contains only that instruction. The default definition would instead + only look at SET_SRCs, ignoring SET_DESTs. + + This ensures that, for example, storing a 128-bit zero vector is more + expensive than storing a 128-bit vector register. A move of zero + into a 128-bit vector register followed by multiple stores of that + register is then cheaper than multiple stores of zero (which would + use STP of XZR). This in turn allows STP Qs to be formed. */ +static int +aarch64_insn_cost (rtx_insn *insn, bool speed) +{ + if (rtx set = single_set (insn)) + return set_rtx_cost (set, speed); + return pattern_cost (PATTERN (insn), speed); +} + /* Implement TARGET_INIT_BUILTINS. */ static void aarch64_init_builtins () @@ -28399,6 +28421,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper +#undef TARGET_INSN_COST +#define TARGET_INSN_COST aarch64_insn_cost + #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p -- cgit v1.1