aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2020-12-01 14:53:30 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2020-12-09 14:05:30 +0000
commit76e4f444a6eb681a60ffae17a10f55631ba9bf69 (patch)
tree000e8e86bf8369c24009a1d60c8466abc105f578 /gcc
parentcf97b970fe598740ee0833729bcdce5dc3913f2d (diff)
downloadgcc-76e4f444a6eb681a60ffae17a10f55631ba9bf69.zip
gcc-76e4f444a6eb681a60ffae17a10f55631ba9bf69.tar.gz
gcc-76e4f444a6eb681a60ffae17a10f55631ba9bf69.tar.bz2
aarch64: Add CPU-specific SVE vector costs struct
This patch extends the backend vector costs structures to allow for separate Advanced SIMD and SVE costs. The fields in the current cpu_vector_costs that would vary between the ISAs are moved into a simd_vec_cost struct and we have two typedefs of it: advsimd_vec_cost and sve_vec_costs. If, in the future, SVE needs some extra fields it could inherit from simd_vec_cost. The CPU vector cost tables in aarch64.c are updated for the struct changes. aarch64_builtin_vectorization_cost is updated to select either the Advanced SIMD or SVE costs field depending on the mode and field availability. No change in codegen is intended with this patch. gcc/ * config/aarch64/aarch64-protos.h (cpu_vector_cost): Move simd fields to... (simd_vec_cost): ... Here. Define. (advsimd_vec_cost): Define. (sve_vec_cost): Define. * config/aarch64/aarch64.c (generic_advsimd_vector_cost): Define. (generic_sve_vector_cost): Likewise. (generic_vector_cost): Update. (qdf24xx_advsimd_vector_cost): Define. (qdf24xx_vector_cost): Update. (thunderx_advsimd_vector_cost): Define. (thunderx_vector_cost): Update. (tsv110_advsimd_vector_cost): Define. (tsv110_vector_cost): Likewise. (cortexa57_advsimd_vector_cost): Define. (cortexa57_vector_cost): Update. (exynosm1_advsimd_vector_cost): Define. (exynosm1_vector_cost): Update. (xgene1_advsimd_vector_cost): Define. (xgene1_vector_cost): Update. (thunderx2t99_advsimd_vector_cost): Define. (thunderx2t99_vector_cost): Update. (thunderx3t110_advsimd_vector_cost): Define. (thunderx3t110_vector_cost): Update. (aarch64_builtin_vectorization_cost): Handle sve and advsimd vector cost fields.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h41
-rw-r--r--gcc/config/aarch64/aarch64.c279
2 files changed, 204 insertions, 116 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 2aa3f1f..c847966 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -192,6 +192,29 @@ struct cpu_regmove_cost
const int FP2FP;
};
+struct simd_vec_cost
+{
+ const int int_stmt_cost; /* Cost of any int vector operation,
+ excluding load, store, permute,
+ vector-to-scalar and
+ scalar-to-vector operation. */
+ const int fp_stmt_cost; /* Cost of any fp vector operation,
+ excluding load, store, permute,
+ vector-to-scalar and
+ scalar-to-vector operation. */
+ const int permute_cost; /* Cost of permute operation. */
+ const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector
+ operation. */
+ const int align_load_cost; /* Cost of aligned vector load. */
+ const int unalign_load_cost; /* Cost of unaligned vector load. */
+ const int unalign_store_cost; /* Cost of unaligned vector store. */
+ const int store_cost; /* Cost of vector store. */
+};
+
+typedef struct simd_vec_cost advsimd_vec_cost;
+typedef struct simd_vec_cost sve_vec_cost;
+
/* Cost for vector insn classes. */
struct cpu_vector_cost
{
@@ -201,24 +224,10 @@ struct cpu_vector_cost
excluding load and store. */
const int scalar_load_cost; /* Cost of scalar load. */
const int scalar_store_cost; /* Cost of scalar store. */
- const int vec_int_stmt_cost; /* Cost of any int vector operation,
- excluding load, store, permute,
- vector-to-scalar and
- scalar-to-vector operation. */
- const int vec_fp_stmt_cost; /* Cost of any fp vector operation,
- excluding load, store, permute,
- vector-to-scalar and
- scalar-to-vector operation. */
- const int vec_permute_cost; /* Cost of permute operation. */
- const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
- const int scalar_to_vec_cost; /* Cost of scalar-to-vector
- operation. */
- const int vec_align_load_cost; /* Cost of aligned vector load. */
- const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
- const int vec_unalign_store_cost; /* Cost of unaligned vector store. */
- const int vec_store_cost; /* Cost of vector store. */
const int cond_taken_branch_cost; /* Cost of taken branch. */
const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
+ const advsimd_vec_cost *advsimd; /* Cost of Advanced SIMD operations. */
+ const sve_vec_cost *sve; /* Cost of SVE operations. */
};
/* Branch costs. */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 67ffba0..b796301 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -559,6 +559,34 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
2 /* FP2FP */
};
+/* Generic costs for Advanced SIMD vector operations. */
+static const advsimd_vec_cost generic_advsimd_vector_cost =
+{
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 2, /* permute_cost */
+ 2, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* align_load_cost */
+ 1, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+/* Generic costs for SVE vector operations. */
+static const sve_vec_cost generic_sve_vector_cost =
+{
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 2, /* permute_cost */
+ 2, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* align_load_cost */
+ 1, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost generic_vector_cost =
{
@@ -566,17 +594,23 @@ static const struct cpu_vector_cost generic_vector_cost =
1, /* scalar_fp_stmt_cost */
1, /* scalar_load_cost */
1, /* scalar_store_cost */
- 1, /* vec_int_stmt_cost */
- 1, /* vec_fp_stmt_cost */
- 2, /* vec_permute_cost */
- 2, /* vec_to_scalar_cost */
- 1, /* scalar_to_vec_cost */
- 1, /* vec_align_load_cost */
- 1, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &generic_advsimd_vector_cost, /* advsimd */
+ &generic_sve_vector_cost /* sve */
+};
+
+static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
+{
+ 1, /* int_stmt_cost */
+ 3, /* fp_stmt_cost */
+ 2, /* permute_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* align_load_cost */
+ 1, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
};
/* QDF24XX costs for vector insn classes. */
@@ -586,17 +620,24 @@ static const struct cpu_vector_cost qdf24xx_vector_cost =
1, /* scalar_fp_stmt_cost */
1, /* scalar_load_cost */
1, /* scalar_store_cost */
- 1, /* vec_int_stmt_cost */
- 3, /* vec_fp_stmt_cost */
- 2, /* vec_permute_cost */
- 1, /* vec_to_scalar_cost */
- 1, /* scalar_to_vec_cost */
- 1, /* vec_align_load_cost */
- 1, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &qdf24xx_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+
+static const advsimd_vec_cost thunderx_advsimd_vector_cost =
+{
+ 4, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 4, /* permute_cost */
+ 2, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 3, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 5, /* unalign_store_cost */
+ 1 /* store_cost */
};
/* ThunderX costs for vector insn classes. */
@@ -606,17 +647,23 @@ static const struct cpu_vector_cost thunderx_vector_cost =
1, /* scalar_fp_stmt_cost */
3, /* scalar_load_cost */
1, /* scalar_store_cost */
- 4, /* vec_int_stmt_cost */
- 1, /* vec_fp_stmt_cost */
- 4, /* vec_permute_cost */
- 2, /* vec_to_scalar_cost */
- 2, /* scalar_to_vec_cost */
- 3, /* vec_align_load_cost */
- 5, /* vec_unalign_load_cost */
- 5, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
- 3 /* cond_not_taken_branch_cost */
+ 3, /* cond_not_taken_branch_cost */
+ &thunderx_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+static const advsimd_vec_cost tsv110_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 2, /* permute_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
};
static const struct cpu_vector_cost tsv110_vector_cost =
@@ -625,37 +672,49 @@ static const struct cpu_vector_cost tsv110_vector_cost =
1, /* scalar_fp_stmt_cost */
5, /* scalar_load_cost */
1, /* scalar_store_cost */
- 2, /* vec_int_stmt_cost */
- 2, /* vec_fp_stmt_cost */
- 2, /* vec_permute_cost */
- 3, /* vec_to_scalar_cost */
- 2, /* scalar_to_vec_cost */
- 5, /* vec_align_load_cost */
- 5, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
1, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &tsv110_advsimd_vector_cost, /* advsimd */
+ NULL, /* sve */
};
-/* Generic costs for vector insn classes. */
+static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 3, /* permute_cost */
+ 8, /* vec_to_scalar_cost */
+ 8, /* scalar_to_vec_cost */
+ 4, /* align_load_cost */
+ 4, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+/* Cortex-A57 costs for vector insn classes. */
static const struct cpu_vector_cost cortexa57_vector_cost =
{
1, /* scalar_int_stmt_cost */
1, /* scalar_fp_stmt_cost */
4, /* scalar_load_cost */
1, /* scalar_store_cost */
- 2, /* vec_int_stmt_cost */
- 2, /* vec_fp_stmt_cost */
- 3, /* vec_permute_cost */
- 8, /* vec_to_scalar_cost */
- 8, /* scalar_to_vec_cost */
- 4, /* vec_align_load_cost */
- 4, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
1, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &cortexa57_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
+{
+ 3, /* int_stmt_cost */
+ 3, /* fp_stmt_cost */
+ 3, /* permute_cost */
+ 3, /* vec_to_scalar_cost */
+ 3, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
};
static const struct cpu_vector_cost exynosm1_vector_cost =
@@ -664,17 +723,23 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
1, /* scalar_fp_stmt_cost */
5, /* scalar_load_cost */
1, /* scalar_store_cost */
- 3, /* vec_int_stmt_cost */
- 3, /* vec_fp_stmt_cost */
- 3, /* vec_permute_cost */
- 3, /* vec_to_scalar_cost */
- 3, /* scalar_to_vec_cost */
- 5, /* vec_align_load_cost */
- 5, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
1, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &exynosm1_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+static const advsimd_vec_cost xgene1_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 2, /* permute_cost */
+ 4, /* vec_to_scalar_cost */
+ 4, /* scalar_to_vec_cost */
+ 10, /* align_load_cost */
+ 10, /* unalign_load_cost */
+ 2, /* unalign_store_cost */
+ 2 /* store_cost */
};
/* Generic costs for vector insn classes. */
@@ -684,17 +749,23 @@ static const struct cpu_vector_cost xgene1_vector_cost =
1, /* scalar_fp_stmt_cost */
5, /* scalar_load_cost */
1, /* scalar_store_cost */
- 2, /* vec_int_stmt_cost */
- 2, /* vec_fp_stmt_cost */
- 2, /* vec_permute_cost */
- 4, /* vec_to_scalar_cost */
- 4, /* scalar_to_vec_cost */
- 10, /* vec_align_load_cost */
- 10, /* vec_unalign_load_cost */
- 2, /* vec_unalign_store_cost */
- 2, /* vec_store_cost */
2, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &xgene1_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
+{
+ 4, /* int_stmt_cost */
+ 5, /* fp_stmt_cost */
+ 10, /* permute_cost */
+ 6, /* vec_to_scalar_cost */
+ 5, /* scalar_to_vec_cost */
+ 4, /* align_load_cost */
+ 4, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
};
/* Costs for vector insn classes for Vulcan. */
@@ -704,17 +775,23 @@ static const struct cpu_vector_cost thunderx2t99_vector_cost =
6, /* scalar_fp_stmt_cost */
4, /* scalar_load_cost */
1, /* scalar_store_cost */
- 4, /* vec_int_stmt_cost */
- 5, /* vec_fp_stmt_cost */
- 10, /* vec_permute_cost */
- 6, /* vec_to_scalar_cost */
- 5, /* scalar_to_vec_cost */
- 4, /* vec_align_load_cost */
- 4, /* vec_unalign_load_cost */
- 1, /* vec_unalign_store_cost */
- 1, /* vec_store_cost */
2, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &thunderx2t99_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
+};
+
+static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
+{
+ 5, /* int_stmt_cost */
+ 5, /* fp_stmt_cost */
+ 10, /* permute_cost */
+ 5, /* vec_to_scalar_cost */
+ 5, /* scalar_to_vec_cost */
+ 4, /* align_load_cost */
+ 4, /* unalign_load_cost */
+ 4, /* unalign_store_cost */
+ 4 /* store_cost */
};
static const struct cpu_vector_cost thunderx3t110_vector_cost =
@@ -723,17 +800,10 @@ static const struct cpu_vector_cost thunderx3t110_vector_cost =
5, /* scalar_fp_stmt_cost */
4, /* scalar_load_cost */
1, /* scalar_store_cost */
- 5, /* vec_int_stmt_cost */
- 5, /* vec_fp_stmt_cost */
- 10, /* vec_permute_cost */
- 5, /* vec_to_scalar_cost */
- 5, /* scalar_to_vec_cost */
- 4, /* vec_align_load_cost */
- 4, /* vec_unalign_load_cost */
- 4, /* vec_unalign_store_cost */
- 4, /* vec_store_cost */
2, /* cond_taken_branch_cost */
- 1 /* cond_not_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &thunderx3t110_advsimd_vector_cost, /* advsimd */
+ NULL /* sve */
};
@@ -13712,6 +13782,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
if (vectype != NULL)
fp = FLOAT_TYPE_P (vectype);
+ const simd_vec_cost *simd_costs;
+ if (vectype != NULL && aarch64_sve_mode_p (TYPE_MODE (vectype))
+ && costs->sve != NULL)
+ simd_costs = costs->sve;
+ else
+ simd_costs = costs->advsimd;
+
switch (type_of_cost)
{
case scalar_stmt:
@@ -13724,27 +13801,28 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return costs->scalar_store_cost;
case vector_stmt:
- return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+ return fp ? simd_costs->fp_stmt_cost
+ : simd_costs->int_stmt_cost;
case vector_load:
- return costs->vec_align_load_cost;
+ return simd_costs->align_load_cost;
case vector_store:
- return costs->vec_store_cost;
+ return simd_costs->store_cost;
case vec_to_scalar:
- return costs->vec_to_scalar_cost;
+ return simd_costs->vec_to_scalar_cost;
case scalar_to_vec:
- return costs->scalar_to_vec_cost;
+ return simd_costs->scalar_to_vec_cost;
case unaligned_load:
case vector_gather_load:
- return costs->vec_unalign_load_cost;
+ return simd_costs->unalign_load_cost;
case unaligned_store:
case vector_scatter_store:
- return costs->vec_unalign_store_cost;
+ return simd_costs->unalign_store_cost;
case cond_branch_taken:
return costs->cond_taken_branch_cost;
@@ -13753,10 +13831,11 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return costs->cond_not_taken_branch_cost;
case vec_perm:
- return costs->vec_permute_cost;
+ return simd_costs->permute_cost;
case vec_promote_demote:
- return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+ return fp ? simd_costs->fp_stmt_cost
+ : simd_costs->int_stmt_cost;
case vec_construct:
elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));