diff options
author | liuhongt <hongtao.liu@intel.com> | 2024-01-08 15:13:41 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2024-05-07 15:44:35 +0800 |
commit | 8b974f54393ab2d2d16a0051a68c155455a92aad (patch) | |
tree | 9415473db2b461b4517ee4d2c67db9279ce4d8b3 | |
parent | fa911365490a7ca308878517a4af6189ffba7ed6 (diff) | |
download | gcc-8b974f54393ab2d2d16a0051a68c155455a92aad.zip gcc-8b974f54393ab2d2d16a0051a68c155455a92aad.tar.gz gcc-8b974f54393ab2d2d16a0051a68c155455a92aad.tar.bz2 |
Extend usdot_prodv*qi with vpmaddwd when AVXVNNI/AVX512VNNI is not available.
gcc/ChangeLog:
* config/i386/sse.md (usdot_prodv*qi): Extend to VI1_AVX512
with vpmaddwd when avxvnni/avx512vnni is not available.
-rw-r--r-- | gcc/config/i386/sse.md | 55 |
1 files changed, 41 insertions, 14 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1bf5072..f57f36a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -29955,21 +29955,48 @@ (define_expand "usdot_prod<mode>" [(match_operand:<ssedvecmode> 0 "register_operand") - (match_operand:VI1_AVX512VNNI 1 "register_operand") - (match_operand:VI1_AVX512VNNI 2 "register_operand") + (match_operand:VI1_AVX512 1 "register_operand") + (match_operand:VI1_AVX512 2 "register_operand") (match_operand:<ssedvecmode> 3 "register_operand")] - "((<MODE_SIZE> == 64 && TARGET_EVEX512) - || ((TARGET_AVX512VNNI && TARGET_AVX512VL) - || TARGET_AVXVNNI))" -{ - operands[1] = lowpart_subreg (<ssedvecmode>mode, - force_reg (<MODE>mode, operands[1]), - <MODE>mode); - operands[2] = lowpart_subreg (<ssedvecmode>mode, - force_reg (<MODE>mode, operands[2]), - <MODE>mode); - emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3], - operands[1], operands[2])); + "TARGET_SSE2" +{ + if (<MODE_SIZE> == 64 + ? TARGET_AVX512VNNI + : ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)) + { + operands[1] = lowpart_subreg (<ssedvecmode>mode, + force_reg (<MODE>mode, operands[1]), + <MODE>mode); + operands[2] = lowpart_subreg (<ssedvecmode>mode, + force_reg (<MODE>mode, operands[2]), + <MODE>mode); + emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3], + operands[1], operands[2])); + } + else + { + /* Emulate with vpdpwssd. */ + rtx op1_lo = gen_reg_rtx (<sseunpackmode>mode); + rtx op1_hi = gen_reg_rtx (<sseunpackmode>mode); + rtx op2_lo = gen_reg_rtx (<sseunpackmode>mode); + rtx op2_hi = gen_reg_rtx (<sseunpackmode>mode); + + emit_insn (gen_vec_unpacku_lo_<mode> (op1_lo, operands[1])); + emit_insn (gen_vec_unpacks_lo_<mode> (op2_lo, operands[2])); + emit_insn (gen_vec_unpacku_hi_<mode> (op1_hi, operands[1])); + emit_insn (gen_vec_unpacks_hi_<mode> (op2_hi, operands[2])); + + rtx res1 = gen_reg_rtx (<ssedvecmode>mode); + rtx res2 = gen_reg_rtx (<ssedvecmode>mode); + rtx sum = gen_reg_rtx (<ssedvecmode>mode); + + emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode)); + emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo, + op2_lo, sum)); + emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi, + op2_hi, operands[3])); + emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2)); + } DONE; }) |