aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2013-02-12 14:53:45 +0000
committerChristophe Lyon <clyon@gcc.gnu.org>2013-02-12 15:53:45 +0100
commit2597da229a2261de8576099213ffdef4bf946a7c (patch)
tree632f166a13b2e090eba199f0717558186acc354e /gcc
parent43320568b265d6f0448e6f8ae7e6a8ace3a13be0 (diff)
downloadgcc-2597da229a2261de8576099213ffdef4bf946a7c.zip
gcc-2597da229a2261de8576099213ffdef4bf946a7c.tar.gz
gcc-2597da229a2261de8576099213ffdef4bf946a7c.tar.bz2
arm-protos.h (struct cpu_vec_costs): New struct type.
2013-02-05 Christophe Lyon <christophe.lyon@linaro.org> * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type. (struct tune_params): Add vec_costs field. * config/arm/arm.c (arm_builtin_vectorization_cost) (arm_add_stmt_cost): New functions. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST) (TARGET_VECTORIZE_ADD_STMT_COST): Define. (arm_default_vec_cost): New struct of type cpu_vec_costs. (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune) (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune) (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune) (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field. From-SVN: r195977
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/arm/arm-protos.h23
-rw-r--r--gcc/config/arm/arm.c133
3 files changed, 170 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a0ced1d..0be8139 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
+
+ * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
+ (struct tune_params): Add vec_costs field.
+ * config/arm/arm.c (arm_builtin_vectorization_cost)
+ (arm_add_stmt_cost): New functions.
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST)
+ (TARGET_VECTORIZE_ADD_STMT_COST): Define.
+ (arm_default_vec_cost): New struct of type cpu_vec_costs.
+ (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
+ (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
+ (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
+ (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.
+
2013-02-12 Richard Biener <rguenther@suse.de>
PR lto/56295
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index e9f74dc..ffa00c0 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -224,6 +224,27 @@ extern const char *arm_mangle_type (const_tree);
extern void arm_order_regs_for_local_alloc (void);
+/* Vectorizer cost model implementation. */
+struct cpu_vec_costs {
+ const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
+ load and store. */
+ const int scalar_load_cost; /* Cost of scalar load. */
+ const int scalar_store_cost; /* Cost of scalar store. */
+ const int vec_stmt_cost; /* Cost of any vector operation, excluding
+ load, store, vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
+ const int vec_unalign_store_cost; /* Cost of unaligned vector load. */
+ const int vec_store_cost; /* Cost of vector store. */
+ const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
+ cost model. */
+ const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+ vectorizer cost model. */
+};
+
#ifdef RTX_CODE
/* This needs to be here because we need RTX_CODE and similar. */
@@ -246,6 +267,8 @@ struct tune_params
performance. The first element covers Thumb state and the second one
is for ARM state. */
bool logical_op_non_short_circuit[2];
+ /* Vectorizer costs. */
+ const struct cpu_vec_costs* vec_costs;
};
extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d7877b0..677d6da 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -268,6 +268,16 @@ static int arm_cortex_a5_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
+
+static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED);
+static unsigned arm_add_stmt_cost (void *data, int count,
+ enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info,
+ int misalign,
+ enum vect_cost_model_location where);
+
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value);
@@ -629,6 +639,12 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
arm_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ arm_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
+
#undef TARGET_CANONICALIZE_COMPARISON
#define TARGET_CANONICALIZE_COMPARISON \
arm_canonicalize_comparison
@@ -891,6 +907,23 @@ struct processors
l1_size, \
l1_line_size
+/* arm generic vectorizer costs. */
+static const
+struct cpu_vec_costs arm_default_vec_cost = {
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_unalign_store_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
@@ -902,6 +935,7 @@ const struct tune_params arm_slowmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fastmul_tune =
@@ -915,6 +949,7 @@ const struct tune_params arm_fastmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -931,6 +966,7 @@ const struct tune_params arm_strongarm_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_xscale_tune =
@@ -944,6 +980,7 @@ const struct tune_params arm_xscale_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_9e_tune =
@@ -957,6 +994,7 @@ const struct tune_params arm_9e_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_v6t2_tune =
@@ -970,6 +1008,7 @@ const struct tune_params arm_v6t2_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -984,6 +1023,7 @@ const struct tune_params arm_cortex_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_cortex_a15_tune =
@@ -997,6 +1037,7 @@ const struct tune_params arm_cortex_a15_tune =
arm_default_branch_cost,
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1013,6 +1054,7 @@ const struct tune_params arm_cortex_a5_tune =
arm_cortex_a5_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -1026,6 +1068,7 @@ const struct tune_params arm_cortex_a9_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -1041,6 +1084,7 @@ const struct tune_params arm_v6m_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fa726te_tune =
@@ -1054,6 +1098,7 @@ const struct tune_params arm_fa726te_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
@@ -8696,6 +8741,94 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
}
+/* Vectorizer cost model implementation. */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return current_tune->vec_costs->scalar_stmt_cost;
+
+ case scalar_load:
+ return current_tune->vec_costs->scalar_load_cost;
+
+ case scalar_store:
+ return current_tune->vec_costs->scalar_store_cost;
+
+ case vector_stmt:
+ return current_tune->vec_costs->vec_stmt_cost;
+
+ case vector_load:
+ return current_tune->vec_costs->vec_align_load_cost;
+
+ case vector_store:
+ return current_tune->vec_costs->vec_store_cost;
+
+ case vec_to_scalar:
+ return current_tune->vec_costs->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return current_tune->vec_costs->scalar_to_vec_cost;
+
+ case unaligned_load:
+ return current_tune->vec_costs->vec_unalign_load_cost;
+
+ case unaligned_store:
+ return current_tune->vec_costs->vec_unalign_store_cost;
+
+ case cond_branch_taken:
+ return current_tune->vec_costs->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return current_tune->vec_costs->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ case vec_promote_demote:
+ return current_tune->vec_costs->vec_stmt_cost;
+
+ case vec_construct:
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ return elements / 2 + 1;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost. */
+
+static unsigned
+arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, int misalign,
+ enum vect_cost_model_location where)
+{
+ unsigned *cost = (unsigned *) data;
+ unsigned retval = 0;
+
+ if (flag_vect_cost_model)
+ {
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
+
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
+
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
+ }
+
+ return retval;
+}
/* Return true if and only if this insn can dual-issue only as older. */
static bool