aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-05-22 16:31:41 +0200
committerUros Bizjak <ubizjak@gmail.com>2023-05-22 16:32:32 +0200
commit11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6 (patch)
treef3219fc995a3afb2f18de1e42f5e3d2b392f2edf /gcc
parenta8900fe4812f6b77d1349308bb4de08f7c130225 (diff)
downloadgcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.zip
gcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.tar.gz
gcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.tar.bz2
i386: Account for the memory read in V*QImode multiplication sequences
Add the cost of a memory read to the cost of V*QImode vector mult sequences. gcc/ChangeLog: * config/i386/i386.cc (ix86_multiplication_cost): Add the cost of a memory read to the cost of V?QImode sequences.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.cc31
1 files changed, 23 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 6a4b332..a36e625 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20463,27 +20463,42 @@ ix86_multiplication_cost (const struct processor_costs *cost,
{
case V4QImode:
case V8QImode:
- /* Partial V*QImode is emulated with 4-5 insns. */
- if ((TARGET_AVX512BW && TARGET_AVX512VL) || TARGET_XOP)
+ /* Partial V*QImode is emulated with 4-6 insns. */
+ if (TARGET_AVX512BW && TARGET_AVX512VL)
return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+ else if (TARGET_AVX2)
+ return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 5);
+ else if (TARGET_XOP)
+ return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3)
+ + cost->sse_load[2]);
else
- return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4);
+ return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4)
+ + cost->sse_load[2]);
case V16QImode:
/* V*QImode is emulated with 4-11 insns. */
if (TARGET_AVX512BW && TARGET_AVX512VL)
return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+ else if (TARGET_AVX2)
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 8);
else if (TARGET_XOP)
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
- /* FALLTHRU */
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5)
+ + cost->sse_load[2]);
+ else
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+ + cost->sse_load[2]);
+
case V32QImode:
- if (TARGET_AVX512BW && mode == V32QImode)
+ if (TARGET_AVX512BW)
return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
else
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7);
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+ + cost->sse_load[3] * 2);
case V64QImode:
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9);
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9)
+ + cost->sse_load[3] * 2
+ + cost->sse_load[4] * 2);
case V4SImode:
/* pmulld is used in this case. No emulation is needed. */