aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorDimitar Dimitrov <dimitar@dinux.eu>2023-10-16 20:40:18 +0300
committerDimitar Dimitrov <dimitar@dinux.eu>2023-10-18 20:16:20 +0300
commitfe9767eedcea3e867a48102f96d913fc70670e67 (patch)
tree4e33e0911e97f99a736df942f9aa97de15562572 /gcc
parent67f7bf78ba3bea2c4efe87589714d57ccb1d8f93 (diff)
downloadgcc-fe9767eedcea3e867a48102f96d913fc70670e67.zip
gcc-fe9767eedcea3e867a48102f96d913fc70670e67.tar.gz
gcc-fe9767eedcea3e867a48102f96d913fc70670e67.tar.bz2
pru: Implement TARGET_INSN_COST
This patch slightly improves the embench-iot benchmark score for PRU code size. There is also small improvement in a few real-world firmware programs. Embench-iot size ------------------------------------------ Benchmark before after delta --------- ---- ---- ----- aha-mont64 4.15 4.15 0 crc32 6.04 6.04 0 cubic 21.64 21.62 -0.02 edn 6.37 6.37 0 huffbench 18.63 18.55 -0.08 matmult-int 5.44 5.44 0 md5sum 25.56 25.43 -0.13 minver 12.82 12.76 -0.06 nbody 15.09 14.97 -0.12 nettle-aes 4.75 4.75 0 nettle-sha256 4.67 4.67 0 nsichneu 3.77 3.77 0 picojpeg 4.11 4.11 0 primecount 7.90 7.90 0 qrduino 7.18 7.16 -0.02 sglib-combined 13.63 13.59 -0.04 slre 5.19 5.19 0 st 14.23 14.12 -0.11 statemate 2.34 2.34 0 tarfind 36.85 36.64 -0.21 ud 10.51 10.46 -0.05 wikisort 7.44 7.41 -0.03 --------- ----- ----- Geometric mean 8.42 8.40 -0.02 Geometric SD 2.00 2.00 0 Geometric range 12.68 12.62 -0.06 gcc/ChangeLog: * config/pru/pru.cc (pru_insn_cost): New function. (TARGET_INSN_COST): Define for PRU. Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/pru/pru.cc36
1 files changed, 36 insertions, 0 deletions
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 6e8112b..fd1924e 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -783,6 +783,39 @@ pru_rtx_costs (rtx x, machine_mode mode,
}
}
}
+
+/* Insn costs on PRU are straightforward because:
+ - Insns emit 0, 1 or more instructions.
+ - All instructions are 32-bit length.
+ - All instructions execute in 1 cycle (sans memory access delays).
+ The "length" attribute maps nicely to the insn cost. */
+
+static int
+pru_insn_cost (rtx_insn *insn, bool speed)
+{
+ /* Use generic cost calculation for unrecognized insns. */
+ if (recog_memoized (insn) < 0)
+ return pattern_cost (insn, speed);
+
+ unsigned int len = get_attr_length (insn);
+
+ gcc_assert ((len % 4) == 0);
+
+ int cost = COSTS_N_INSNS (len / 4);
+ /* Some insns have zero length (e.g. blockage, pruloop_end).
+ In such cases give the minimum cost, because a return of
+ 0 would incorrectly indicate that the insn cost is unknown. */
+ if (cost == 0)
+ cost = 1;
+
+ /* Writes are usually posted, so they take 1 cycle. Reads
+ from DMEM usually take 3 cycles.
+ See TI document SPRACE8A, Device-Specific PRU Read Latency Values. */
+ if (speed && get_attr_type (insn) == TYPE_LD)
+ cost += COSTS_N_INSNS (2);
+
+ return cost;
+}
static GTY(()) rtx eqdf_libfunc;
static GTY(()) rtx nedf_libfunc;
@@ -3175,6 +3208,9 @@ pru_unwind_word_mode (void)
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS pru_rtx_costs
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST pru_insn_cost
+
#undef TARGET_PRINT_OPERAND
#define TARGET_PRINT_OPERAND pru_print_operand