aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2025-04-21 20:16:50 +0200
committerJan Hubicka <hubicka@ucw.cz>2025-04-21 20:17:15 +0200
commit0907a810f586b07636cc5b83dba6025eb5240655 (patch)
tree45e3dcb095ea63dbc2cddf4274fe230f34ac37ff
parent2d07e3687998f31dfc7a18cbf14f17096c599549 (diff)
downloadgcc-0907a810f586b07636cc5b83dba6025eb5240655.zip
gcc-0907a810f586b07636cc5b83dba6025eb5240655.tar.gz
gcc-0907a810f586b07636cc5b83dba6025eb5240655.tar.bz2
Fix cost of vectorized double->float conversion
In previous patch I miscomputed costs of cvtpd2pf instruction which mistakely gets accounted as 2 (VEC_PACK_TRUNC_EXPR). Vectorizer can produce both, but when producing VEC_PACK_TRUNC_EXPR it use promote_demote patch. This patch thus simplifies handling of NOP_EXPR since in that case we should always be producing only one instruction. PR target/119879 * config/i386/i386.cc (fp_conversion_stmt_cost): Inline to ... (ix86_vector_costs::add_stmt_cost): ... here; fix handling of NOP_EXPR.
-rw-r--r--gcc/config/i386/i386.cc51
1 files changed, 22 insertions, 29 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 28603c2..d15f91d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25257,32 +25257,6 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
return new ix86_vector_costs (vinfo, costing_for_scalar);
}
-/* Return cost of statement doing FP conversion. */
-
-static unsigned
-fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p)
-{
- int outer_size
- = tree_to_uhwi
- (TYPE_SIZE
- (TREE_TYPE (gimple_assign_lhs (stmt))));
- int inner_size
- = tree_to_uhwi
- (TYPE_SIZE
- (TREE_TYPE (gimple_assign_rhs1 (stmt))));
- int stmt_cost = vec_fp_conversion_cost
- (ix86_tune_cost, GET_MODE_BITSIZE (mode));
- /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
- up doing two conversions and packing them. */
- if (!scalar_p && inner_size > outer_size)
- {
- int n = inner_size / outer_size;
- stmt_cost = stmt_cost * n
- + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
- }
- return stmt_cost;
-}
-
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
@@ -25394,8 +25368,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
else if (fp)
- stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt,
- scalar_p);
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
break;
case BIT_IOR_EXPR:
@@ -25439,7 +25413,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
if (kind == vec_promote_demote
&& fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
- stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p);
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ int stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+ up doing two conversions and packing them. */
+ if (inner_size > outer_size)
+ {
+ int n = inner_size / outer_size;
+ stmt_cost = stmt_cost * n
+ + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ }
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads