aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndre Vieira <andre.simoesdiasvieira@arm.com>2022-03-22 10:45:23 +0000
committerAndre Vieira <andre.simoesdiasvieira@arm.com>2022-03-22 11:59:21 +0000
commitb074fa69707a891f07f06f0b1a95999447f66149 (patch)
tree7f603eaafc886f2f6883339b3562c54369d216fe /gcc
parenta850930164ebbba2a6e18a39242cd0371b548407 (diff)
downloadgcc-b074fa69707a891f07f06f0b1a95999447f66149.zip
gcc-b074fa69707a891f07f06f0b1a95999447f66149.tar.gz
gcc-b074fa69707a891f07f06f0b1a95999447f66149.tar.bz2
aarch64: Update reg-costs to differentiate between memmove costs
This patch introduces a struct to differentiate between different memmove costs to enable a better modeling of memory operations. These have been modelled for -mcpu/-mtune=neoverse-v1/neoverse-n1/neoverse-n2/neoverse-512tvb, for all other tunings all entries are equal to the old single memmove cost to ensure the behaviour remains the same. 2022-03-16 Tamar Christina <tamar.christina@arm.com> Andre Vieira <andre.simoesdiasvieira@arm.com> gcc/ChangeLog: * config/aarch64/aarch64-protos.h (struct cpu_memmov_cost): New struct. (struct tune_params): Change type of memmov_cost to use cpu_memmov_cost. * config/aarch64/aarch64.cc (aarch64_memory_move_cost): Update all tunings to use cpu_memmov_cost struct.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h15
-rw-r--r--gcc/config/aarch64/aarch64.cc200
2 files changed, 188 insertions, 27 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index d0e78d6..46bade2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -507,6 +507,18 @@ struct cpu_prefetch_tune
const int default_opt_level;
};
+/* Model the costs for loads/stores for the register allocators so that it can
+ do more accurate spill heuristics. */
+struct cpu_memmov_cost
+{
+ int load_int;
+ int store_int;
+ int load_fp;
+ int store_fp;
+ int load_pred;
+ int store_pred;
+};
+
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
@@ -519,7 +531,8 @@ struct tune_params
or SVE_NOT_IMPLEMENTED if not applicable. Only used for tuning
decisions, does not disable VLA vectorization. */
unsigned int sve_width;
- int memmov_cost;
+ /* Structure used by reload to cost spills. */
+ struct cpu_memmov_cost memmov_cost;
int issue_rate;
unsigned int fusible_ops;
const char *function_align;
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c82b5a6..c10124f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1297,7 +1297,13 @@ static const struct tune_params generic_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"16:12", /* function_align. */
@@ -1326,7 +1332,13 @@ static const struct tune_params cortexa35_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
1, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1353,7 +1365,13 @@ static const struct tune_params cortexa53_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1380,7 +1398,13 @@ static const struct tune_params cortexa57_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
@@ -1407,7 +1431,13 @@ static const struct tune_params cortexa72_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
@@ -1434,7 +1464,13 @@ static const struct tune_params cortexa73_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate. */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
@@ -1463,7 +1499,13 @@ static const struct tune_params exynosm1_tunings =
&generic_branch_cost,
&exynosm1_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
"4", /* function_align. */
@@ -1489,7 +1531,13 @@ static const struct tune_params thunderxt88_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
@@ -1515,7 +1563,13 @@ static const struct tune_params thunderx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
2, /* issue_rate */
AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
@@ -1542,7 +1596,13 @@ static const struct tune_params tsv110_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1569,7 +1629,13 @@ static const struct tune_params xgene1_tunings =
&generic_branch_cost,
&xgene1_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
"16", /* function_align. */
@@ -1595,7 +1661,13 @@ static const struct tune_params emag_tunings =
&generic_branch_cost,
&xgene1_approx_modes,
SVE_NOT_IMPLEMENTED,
- 6, /* memmov_cost */
+ { 6, /* load_int. */
+ 6, /* store_int. */
+ 6, /* load_fp. */
+ 6, /* store_fp. */
+ 6, /* load_pred. */
+ 6 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
"16", /* function_align. */
@@ -1621,7 +1693,13 @@ static const struct tune_params qdf24xx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
@@ -1650,7 +1728,13 @@ static const struct tune_params saphira_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
@@ -1677,7 +1761,13 @@ static const struct tune_params thunderx2t99_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate. */
(AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1704,7 +1794,13 @@ static const struct tune_params thunderx3t110_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost. */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
6, /* issue_rate. */
(AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
@@ -1731,7 +1827,13 @@ static const struct tune_params neoversen1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 2, /* store_int. */
+ 5, /* load_fp. */
+ 2, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -1757,7 +1859,13 @@ static const struct tune_params ampere1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_NOT_IMPLEMENTED, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
4, /* issue_rate */
(AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
@@ -1932,7 +2040,13 @@ static const struct tune_params neoversev1_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_256, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2063,7 +2177,13 @@ static const struct tune_params neoverse512tvb_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_128 | SVE_256, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2245,7 +2365,13 @@ static const struct tune_params neoversen2_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_128, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 1, /* store_int. */
+ 6, /* load_fp. */
+ 2, /* store_fp. */
+ 6, /* load_pred. */
+ 1 /* store_pred. */
+ }, /* memmov_cost. */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32:16", /* function_align. */
@@ -2274,7 +2400,13 @@ static const struct tune_params a64fx_tunings =
&generic_branch_cost,
&generic_approx_modes,
SVE_512, /* sve_width */
- 4, /* memmov_cost */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
7, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
"32", /* function_align. */
@@ -14659,12 +14791,28 @@ aarch64_register_move_cost (machine_mode mode,
return regmove_cost->FP2FP;
}
+/* Implements TARGET_MEMORY_MOVE_COST. */
static int
-aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
- reg_class_t rclass ATTRIBUTE_UNUSED,
- bool in ATTRIBUTE_UNUSED)
+aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
{
- return aarch64_tune_params.memmov_cost;
+ enum reg_class rclass = (enum reg_class) rclass_i;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ ? reg_classes_intersect_p (rclass, PR_REGS)
+ : reg_class_subset_p (rclass, PR_REGS))
+ return (in
+ ? aarch64_tune_params.memmov_cost.load_pred
+ : aarch64_tune_params.memmov_cost.store_pred);
+
+ if (VECTOR_MODE_P (mode) || FLOAT_MODE_P (mode)
+ ? reg_classes_intersect_p (rclass, FP_REGS)
+ : reg_class_subset_p (rclass, FP_REGS))
+ return (in
+ ? aarch64_tune_params.memmov_cost.load_fp
+ : aarch64_tune_params.memmov_cost.store_fp);
+
+ return (in
+ ? aarch64_tune_params.memmov_cost.load_int
+ : aarch64_tune_params.memmov_cost.store_int);
}
/* Implement TARGET_INIT_BUILTINS. */