diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2023-05-22 16:31:41 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2023-05-22 16:32:32 +0200 |
commit | 11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6 (patch) | |
tree | f3219fc995a3afb2f18de1e42f5e3d2b392f2edf /gcc | |
parent | a8900fe4812f6b77d1349308bb4de08f7c130225 (diff) | |
download | gcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.zip gcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.tar.gz gcc-11ef53f0e8fa7408eaedf6d9fc781fd6faa1e5f6.tar.bz2 |
i386: Account for the memory read in V*QImode multiplication sequences
Add the cost of a memory read to the cost of V*QImode vector mult sequences.
gcc/ChangeLog:
* config/i386/i386.cc (ix86_multiplication_cost): Add
the cost of a memory read to the cost of V?QImode sequences.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.cc | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 6a4b332..a36e625 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20463,27 +20463,42 @@ ix86_multiplication_cost (const struct processor_costs *cost, { case V4QImode: case V8QImode: - /* Partial V*QImode is emulated with 4-5 insns. */ - if ((TARGET_AVX512BW && TARGET_AVX512VL) || TARGET_XOP) + /* Partial V*QImode is emulated with 4-6 insns. */ + if (TARGET_AVX512BW && TARGET_AVX512VL) return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3); + else if (TARGET_AVX2) + return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 5); + else if (TARGET_XOP) + return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3) + + cost->sse_load[2]); else - return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4); + return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4) + + cost->sse_load[2]); case V16QImode: /* V*QImode is emulated with 4-11 insns. */ if (TARGET_AVX512BW && TARGET_AVX512VL) return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3); + else if (TARGET_AVX2) + return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 8); else if (TARGET_XOP) - return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); - /* FALLTHRU */ + return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5) + + cost->sse_load[2]); + else + return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7) + + cost->sse_load[2]); + case V32QImode: - if (TARGET_AVX512BW && mode == V32QImode) + if (TARGET_AVX512BW) return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3); else - return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7); + return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7) + + cost->sse_load[3] * 2); case V64QImode: - return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9); + return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9) + + cost->sse_load[3] * 2 + + cost->sse_load[4] * 2); case V4SImode: /* pmulld is used in this case. No emulation is needed. */ |