diff options
author | Xi Ruoyao <xry111@xry111.site> | 2023-12-09 17:41:32 +0800 |
---|---|---|
committer | Xi Ruoyao <xry111@xry111.site> | 2023-12-17 19:23:28 +0800 |
commit | 50b3f596bd943ec6110c1987f14e5497ce39622f (patch) | |
tree | 6522d5b2bf963409ac456d0ee01b3a7563e588bf | |
parent | 5f3f11279514afaa14b4ca382c969b48fd250832 (diff) | |
download | gcc-50b3f596bd943ec6110c1987f14e5497ce39622f.zip gcc-50b3f596bd943ec6110c1987f14e5497ce39622f.tar.gz gcc-50b3f596bd943ec6110c1987f14e5497ce39622f.tar.bz2 |
LoongArch: Fix instruction costs [PR112936]
Replace the instruction costs in loongarch_rtx_cost_data constructor
based on micro-benchmark results on LA464 and LA664.
This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
and slli.
gcc/ChangeLog:
PR target/112936
* config/loongarch/loongarch-def.cc
(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
instruction costs per micro-benchmark results.
(loongarch_rtx_cost_optimize_size): Set all instruction costs
to (COSTS_N_INSNS (1) + 1).
* config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
special case for multiplication when optimizing for size.
Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
Account the extra cost when TARGET_CHECK_ZERO_DIV and
optimizing for speed.
gcc/testsuite/ChangeLog
PR target/112936
* gcc.target/loongarch/mul-const-reduction.c: New test.
-rw-r--r-- | gcc/config/loongarch/loongarch-def.cc | 39 | ||||
-rw-r--r-- | gcc/config/loongarch/loongarch.cc | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c | 11 |
3 files changed, 43 insertions, 29 deletions
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index 6217b19..4a8885e 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align = /* Default RTX cost initializer. */ loongarch_rtx_cost_data::loongarch_rtx_cost_data () - : fp_add (COSTS_N_INSNS (1)), - fp_mult_sf (COSTS_N_INSNS (2)), - fp_mult_df (COSTS_N_INSNS (4)), - fp_div_sf (COSTS_N_INSNS (6)), + : fp_add (COSTS_N_INSNS (5)), + fp_mult_sf (COSTS_N_INSNS (5)), + fp_mult_df (COSTS_N_INSNS (5)), + fp_div_sf (COSTS_N_INSNS (8)), fp_div_df (COSTS_N_INSNS (8)), - int_mult_si (COSTS_N_INSNS (1)), - int_mult_di (COSTS_N_INSNS (1)), - int_div_si (COSTS_N_INSNS (4)), - int_div_di (COSTS_N_INSNS (6)), + int_mult_si (COSTS_N_INSNS (4)), + int_mult_di (COSTS_N_INSNS (4)), + int_div_si (COSTS_N_INSNS (5)), + int_div_di (COSTS_N_INSNS (5)), branch_cost (6), memory_latency (4) {} @@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = array_tune<loongarch_rtx_cost_data> (); -/* RTX costs to use when optimizing for size. */ +/* RTX costs to use when optimizing for size. + We use a value slightly larger than COSTS_N_INSNS (1) for all of them + because they are slower than simple instructions. */ +#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1) const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = loongarch_rtx_cost_data () - .fp_add_ (4) - .fp_mult_sf_ (4) - .fp_mult_df_ (4) - .fp_div_sf_ (4) - .fp_div_df_ (4) - .int_mult_si_ (4) - .int_mult_di_ (4) - .int_div_si_ (4) - .int_div_di_ (4); + .fp_add_ (COST_COMPLEX_INSN) + .fp_mult_sf_ (COST_COMPLEX_INSN) + .fp_mult_df_ (COST_COMPLEX_INSN) + .fp_div_sf_ (COST_COMPLEX_INSN) + .fp_div_df_ (COST_COMPLEX_INSN) + .int_mult_si_ (COST_COMPLEX_INSN) + .int_mult_di_ (COST_COMPLEX_INSN) + .int_div_si_ (COST_COMPLEX_INSN) + .int_div_di_ (COST_COMPLEX_INSN); array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () .set (CPU_NATIVE, 4) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 860e6e8..390e320 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3801,8 +3801,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = (speed ? loongarch_cost->int_mult_si * 3 + 6 : COSTS_N_INSNS (7)); - else if (!speed) - *total = COSTS_N_INSNS (1) + 1; else if (mode == DImode) *total = loongarch_cost->int_mult_di; else @@ -3837,14 +3835,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, case UDIV: case UMOD: - if (!speed) - { - *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); - } - else if (mode == DImode) + if (mode == DImode) *total = loongarch_cost->int_div_di; else - *total = loongarch_cost->int_div_si; + { + *total = loongarch_cost->int_div_si; + if (TARGET_64BIT && !TARGET_DIV32) + *total += COSTS_N_INSNS (2); + } + + if (TARGET_CHECK_ZERO_DIV) + *total += COSTS_N_INSNS (2); + return false; case SIGN_EXTEND: @@ -3876,9 +3878,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)))) { - if (!speed) - *total = COSTS_N_INSNS (1) + 1; - else if (mode == DImode) + if (mode == DImode) *total = loongarch_cost->int_mult_di; else *total = loongarch_cost->int_mult_si; diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c new file mode 100644 index 0000000..02d9a48 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=la464" } */ +/* { dg-final { scan-assembler "alsl\.w" } } */ +/* { dg-final { scan-assembler "slli\.w" } } */ +/* { dg-final { scan-assembler-not "mul\.w" } } */ + +int +test (int a) +{ + return a * 68; +} |