aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-01-08 15:13:41 +0800
committerliuhongt <hongtao.liu@intel.com>2024-05-07 15:44:35 +0800
commit8b974f54393ab2d2d16a0051a68c155455a92aad (patch)
tree9415473db2b461b4517ee4d2c67db9279ce4d8b3
parentfa911365490a7ca308878517a4af6189ffba7ed6 (diff)
downloadgcc-8b974f54393ab2d2d16a0051a68c155455a92aad.zip
gcc-8b974f54393ab2d2d16a0051a68c155455a92aad.tar.gz
gcc-8b974f54393ab2d2d16a0051a68c155455a92aad.tar.bz2
Extend usdot_prodv*qi with vpmaddwd when AVXVNNI/AVX512VNNI is not available.
gcc/ChangeLog: * config/i386/sse.md (usdot_prodv*qi): Extend to VI1_AVX512 with vpmaddwd when avxvnni/avx512vnni is not available.
-rw-r--r--gcc/config/i386/sse.md55
1 files changed, 41 insertions, 14 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1bf5072..f57f36a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -29955,21 +29955,48 @@
(define_expand "usdot_prod<mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
- (match_operand:VI1_AVX512VNNI 1 "register_operand")
- (match_operand:VI1_AVX512VNNI 2 "register_operand")
+ (match_operand:VI1_AVX512 1 "register_operand")
+ (match_operand:VI1_AVX512 2 "register_operand")
(match_operand:<ssedvecmode> 3 "register_operand")]
- "((<MODE_SIZE> == 64 && TARGET_EVEX512)
- || ((TARGET_AVX512VNNI && TARGET_AVX512VL)
- || TARGET_AVXVNNI))"
-{
- operands[1] = lowpart_subreg (<ssedvecmode>mode,
- force_reg (<MODE>mode, operands[1]),
- <MODE>mode);
- operands[2] = lowpart_subreg (<ssedvecmode>mode,
- force_reg (<MODE>mode, operands[2]),
- <MODE>mode);
- emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3],
- operands[1], operands[2]));
+ "TARGET_SSE2"
+{
+ if (<MODE_SIZE> == 64
+ ? TARGET_AVX512VNNI
+ : ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI))
+ {
+ operands[1] = lowpart_subreg (<ssedvecmode>mode,
+ force_reg (<MODE>mode, operands[1]),
+ <MODE>mode);
+ operands[2] = lowpart_subreg (<ssedvecmode>mode,
+ force_reg (<MODE>mode, operands[2]),
+ <MODE>mode);
+ emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3],
+ operands[1], operands[2]));
+ }
+ else
+ {
+ /* Emulate with vpdpwssd. */
+ rtx op1_lo = gen_reg_rtx (<sseunpackmode>mode);
+ rtx op1_hi = gen_reg_rtx (<sseunpackmode>mode);
+ rtx op2_lo = gen_reg_rtx (<sseunpackmode>mode);
+ rtx op2_hi = gen_reg_rtx (<sseunpackmode>mode);
+
+ emit_insn (gen_vec_unpacku_lo_<mode> (op1_lo, operands[1]));
+ emit_insn (gen_vec_unpacks_lo_<mode> (op2_lo, operands[2]));
+ emit_insn (gen_vec_unpacku_hi_<mode> (op1_hi, operands[1]));
+ emit_insn (gen_vec_unpacks_hi_<mode> (op2_hi, operands[2]));
+
+ rtx res1 = gen_reg_rtx (<ssedvecmode>mode);
+ rtx res2 = gen_reg_rtx (<ssedvecmode>mode);
+ rtx sum = gen_reg_rtx (<ssedvecmode>mode);
+
+ emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
+ emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ op2_lo, sum));
+ emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ op2_hi, operands[3]));
+ emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
+ }
DONE;
})