aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/aarch64/aarch64-opts.h1
-rw-r--r--gcc/config/aarch64/aarch64-protos.h4
-rw-r--r--gcc/config/aarch64/aarch64.c37
4 files changed, 57 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3976521..3f83c29 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2018-12-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/aarch64-opts.h (aarch64_sve_vector_bits_enum):
+ Add SVE_NOT_IMPLEMENTED value.
+ * config/aarch64/aarch64-protos.h (struct tune_params): Add sve_width
+ field.
+ * config/aarch64/aarch64.c (generic_tunings,cortexa35_tunings,
+ cortexa53_tunings, cortexa57_tunings, cortexa72_tunings,
+ cortexa73_tunings, exynosm1_tunings, thunderx_tunings,
+ thunderx_tunings, tsv110_tunings, xgene1_tunings, qdf24xx_tunings,
+ saphira_tunings, thunderx2t99_tunings, emag_tunings):
+ Specify sve_width.
+ (aarch64_estimated_poly_value): Define.
+ (TARGET_ESTIMATED_POLY_VALUE): Define.
+
2018-12-07 Paul A. Clarke <pc@us.ibm.com>
PR target/88408
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index 7a5c6d7..1ac056b 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -84,6 +84,7 @@ enum aarch64_function_type {
/* SVE vector register sizes. */
enum aarch64_sve_vector_bits_enum {
SVE_SCALABLE,
+ SVE_NOT_IMPLEMENTED = SVE_SCALABLE,
SVE_128 = 128,
SVE_256 = 256,
SVE_512 = 512,
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1fe1a50..4ed886b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -252,6 +252,10 @@ struct tune_params
const struct cpu_vector_cost *vec_costs;
const struct cpu_branch_cost *branch_costs;
const struct cpu_approx_modes *approx_modes;
+ /* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not applicable.
+ Only used for tuning decisions, does not disable VLA
+ vectorization. */
+ enum aarch64_sve_vector_bits_enum sve_width;
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 490a203..ba9b5ad 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -681,6 +681,7 @@ static const struct tune_params generic_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
@@ -706,6 +707,7 @@ static const struct tune_params cortexa35_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
1, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -732,6 +734,7 @@ static const struct tune_params cortexa53_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -758,6 +761,7 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -784,6 +788,7 @@ static const struct tune_params cortexa72_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -810,6 +815,7 @@ static const struct tune_params cortexa73_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */
2, /* issue_rate. */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -838,6 +844,7 @@ static const struct tune_params exynosm1_tunings =
&exynosm1_vector_cost,
&generic_branch_cost,
&exynosm1_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
@@ -863,6 +870,7 @@ static const struct tune_params thunderxt88_tunings =
&thunderx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
@@ -888,6 +896,7 @@ static const struct tune_params thunderx_tunings =
&thunderx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
@@ -914,6 +923,7 @@ static const struct tune_params tsv110_tunings =
&tsv110_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
@@ -940,6 +950,7 @@ static const struct tune_params xgene1_tunings =
&xgene1_vector_cost,
&generic_branch_cost,
&xgene1_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
@@ -965,6 +976,7 @@ static const struct tune_params emag_tunings =
&xgene1_vector_cost,
&generic_branch_cost,
&xgene1_approx_modes,
+ SVE_NOT_IMPLEMENTED,
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
@@ -990,6 +1002,7 @@ static const struct tune_params qdf24xx_tunings =
&qdf24xx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -1018,6 +1031,7 @@ static const struct tune_params saphira_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -1044,6 +1058,7 @@ static const struct tune_params thunderx2t99_tunings =
&thunderx2t99_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */
4, /* issue_rate. */
(AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
@@ -17869,6 +17884,25 @@ aarch64_speculation_safe_value (machine_mode mode,
return result;
}
+/* Implement TARGET_ESTIMATED_POLY_VALUE.
+ Look into the tuning structure for an estimate.
+ VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
+ Advanced SIMD 128 bits. */
+
+static HOST_WIDE_INT
+aarch64_estimated_poly_value (poly_int64 val)
+{
+ enum aarch64_sve_vector_bits_enum width_source
+ = aarch64_tune_params.sve_width;
+
+ /* If we still don't have an estimate, use the default. */
+ if (width_source == SVE_SCALABLE)
+ return default_estimated_poly_value (val);
+
+ HOST_WIDE_INT over_128 = width_source - 128;
+ return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
+}
+
/* Target-specific selftests. */
#if CHECKING_P
@@ -18348,6 +18382,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_SPECULATION_SAFE_VALUE
#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
+#undef TARGET_ESTIMATED_POLY_VALUE
+#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests