diff options
author | Dimitar Dimitrov <dimitar@dinux.eu> | 2023-10-16 20:40:18 +0300 |
---|---|---|
committer | Dimitar Dimitrov <dimitar@dinux.eu> | 2023-10-18 20:16:20 +0300 |
commit | fe9767eedcea3e867a48102f96d913fc70670e67 (patch) | |
tree | 4e33e0911e97f99a736df942f9aa97de15562572 /gcc | |
parent | 67f7bf78ba3bea2c4efe87589714d57ccb1d8f93 (diff) | |
download | gcc-fe9767eedcea3e867a48102f96d913fc70670e67.zip gcc-fe9767eedcea3e867a48102f96d913fc70670e67.tar.gz gcc-fe9767eedcea3e867a48102f96d913fc70670e67.tar.bz2 |
pru: Implement TARGET_INSN_COST
This patch slightly improves the embench-iot benchmark score for
PRU code size. There is also small improvement in a few real-world
firmware programs.
Embench-iot size
------------------------------------------
Benchmark before after delta
--------- ---- ---- -----
aha-mont64 4.15 4.15 0
crc32 6.04 6.04 0
cubic 21.64 21.62 -0.02
edn 6.37 6.37 0
huffbench 18.63 18.55 -0.08
matmult-int 5.44 5.44 0
md5sum 25.56 25.43 -0.13
minver 12.82 12.76 -0.06
nbody 15.09 14.97 -0.12
nettle-aes 4.75 4.75 0
nettle-sha256 4.67 4.67 0
nsichneu 3.77 3.77 0
picojpeg 4.11 4.11 0
primecount 7.90 7.90 0
qrduino 7.18 7.16 -0.02
sglib-combined 13.63 13.59 -0.04
slre 5.19 5.19 0
st 14.23 14.12 -0.11
statemate 2.34 2.34 0
tarfind 36.85 36.64 -0.21
ud 10.51 10.46 -0.05
wikisort 7.44 7.41 -0.03
--------- ----- -----
Geometric mean 8.42 8.40 -0.02
Geometric SD 2.00 2.00 0
Geometric range 12.68 12.62 -0.06
gcc/ChangeLog:
* config/pru/pru.cc (pru_insn_cost): New function.
(TARGET_INSN_COST): Define for PRU.
Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/pru/pru.cc | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 6e8112b..fd1924e 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -783,6 +783,39 @@ pru_rtx_costs (rtx x, machine_mode mode, } } } + +/* Insn costs on PRU are straightforward because: + - Insns emit 0, 1 or more instructions. + - All instructions are 32-bit length. + - All instructions execute in 1 cycle (sans memory access delays). + The "length" attribute maps nicely to the insn cost. */ + +static int +pru_insn_cost (rtx_insn *insn, bool speed) +{ + /* Use generic cost calculation for unrecognized insns. */ + if (recog_memoized (insn) < 0) + return pattern_cost (insn, speed); + + unsigned int len = get_attr_length (insn); + + gcc_assert ((len % 4) == 0); + + int cost = COSTS_N_INSNS (len / 4); + /* Some insns have zero length (e.g. blockage, pruloop_end). + In such cases give the minimum cost, because a return of + 0 would incorrectly indicate that the insn cost is unknown. */ + if (cost == 0) + cost = 1; + + /* Writes are usually posted, so they take 1 cycle. Reads + from DMEM usually take 3 cycles. + See TI document SPRACE8A, Device-Specific PRU Read Latency Values. */ + if (speed && get_attr_type (insn) == TYPE_LD) + cost += COSTS_N_INSNS (2); + + return cost; +} static GTY(()) rtx eqdf_libfunc; static GTY(()) rtx nedf_libfunc; @@ -3175,6 +3208,9 @@ pru_unwind_word_mode (void) #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS pru_rtx_costs +#undef TARGET_INSN_COST +#define TARGET_INSN_COST pru_insn_cost + #undef TARGET_PRINT_OPERAND #define TARGET_PRINT_OPERAND pru_print_operand |