aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2022-03-11 11:51:13 +0100
committerRichard Biener <rguenther@suse.de>2022-03-11 14:11:59 +0100
commit69619acd8d9b5856f5af6e5323d9c7c4ec9ad08f (patch)
treec60870eeb11a2b981f82dfbb1742a734588b7c5d
parentdb494fd68d12f3b49ad5f4747c234f3a94d10c72 (diff)
downloadgcc-69619acd8d9b5856f5af6e5323d9c7c4ec9ad08f.zip
gcc-69619acd8d9b5856f5af6e5323d9c7c4ec9ad08f.tar.gz
gcc-69619acd8d9b5856f5af6e5323d9c7c4ec9ad08f.tar.bz2
target/104762 - vectorization costs of CONSTRUCTORs
After accounting for GPR -> XMM move cost for vec_construct the base cost needs adjustments to not double-cost those. This also lowers the cost when such move is not necessary. 2022-03-11 Richard Biener <rguenther@suse.de> PR target/104762 * config/i386/i386.cc (ix86_builtin_vectorization_cost): Do not cost the first lane of SSE pieces as inserts for vec_construct.
-rw-r--r--gcc/config/i386/i386.cc17
1 files changed, 11 insertions, 6 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4121f98..23bedea 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22597,16 +22597,21 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
- /* N element inserts into SSE vectors. */
- int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
+ int n = TYPE_VECTOR_SUBPARTS (vectype);
+ /* N - 1 element inserts into an SSE vector, the possible
+ GPR -> XMM move is accounted for in add_stmt_cost. */
+ if (GET_MODE_BITSIZE (mode) <= 128)
+ return (n - 1) * ix86_cost->sse_op;
/* One vinserti128 for combining two SSE vectors for AVX256. */
- if (GET_MODE_BITSIZE (mode) == 256)
- cost += ix86_vec_cost (mode, ix86_cost->addss);
+ else if (GET_MODE_BITSIZE (mode) == 256)
+ return ((n - 2) * ix86_cost->sse_op
+ + ix86_vec_cost (mode, ix86_cost->addss));
/* One vinserti64x4 and two vinserti128 for combining SSE
and AVX256 vectors to AVX512. */
else if (GET_MODE_BITSIZE (mode) == 512)
- cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
- return cost;
+ return ((n - 4) * ix86_cost->sse_op
+ + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+ gcc_unreachable ();
}
default: