aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2024-09-02 10:24:29 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-09-02 10:24:29 +0800
commitb1f9fbb6da1a3ced57c3668cecc9f9449e1b237e (patch)
tree16a58b388fc5c081a659b9b22f34bbf5cb0304f9 /gcc/config
parent5239902210a16b22d59d2cf8b535d615922a5c00 (diff)
downloadgcc-b1f9fbb6da1a3ced57c3668cecc9f9449e1b237e.zip
gcc-b1f9fbb6da1a3ced57c3668cecc9f9449e1b237e.tar.gz
gcc-b1f9fbb6da1a3ced57c3668cecc9f9449e1b237e.tar.bz2
i386: Auto vectorize sdot_prod, usdot_prod, udot_prod with AVX10.2 instructions
gcc/ChangeLog: * config/i386/sse.md (VI1_AVX512VNNIBW): New. (VI2_AVX10_2): Ditto. (sdot_prod<mode>): Add AVX10.2 to auto vectorize and combine 512 bit part. (udot_prod<mode>): Ditto. (sdot_prodv64qi): Removed. (udot_prodv64qi): Ditto. (usdot_prod<mode>): Add AVX10.2 to auto vectorize. (udot_prod<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vnniint16-auto-vectorize-2.c: Only define TEST when not defined. * gcc.target/i386/vnniint8-auto-vectorize-2.c: Ditto. * gcc.target/i386/vnniint16-auto-vectorize-3.c: New test. * gcc.target/i386/vnniint16-auto-vectorize-4.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-3.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-4.c: Ditto.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/sse.md93
1 files changed, 21 insertions, 72 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index da91d39..442ac93 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -610,6 +610,10 @@
(define_mode_iterator VI1_AVX512VNNI
[(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+(define_mode_iterator VI1_AVX512VNNIBW
+ [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ (V32QI "TARGET_AVX2") V16QI])
+
(define_mode_iterator VI12_256_512_AVX512VL
[(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
(V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
@@ -627,6 +631,9 @@
[(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
(V16HI "TARGET_AVX2") V8HI])
+(define_mode_iterator VI2_AVX10_2
+ [(V32HI "TARGET_AVX10_2_512") V16HI V8HI])
+
(define_mode_iterator VI4_AVX
[(V8SI "TARGET_AVX") V4SI])
@@ -31232,12 +31239,13 @@
(define_expand "sdot_prod<mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
- (match_operand:VI1_AVX2 1 "register_operand")
- (match_operand:VI1_AVX2 2 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
(match_operand:<ssedvecmode> 3 "register_operand")]
"TARGET_SSE2"
{
- if (TARGET_AVXVNNIINT8)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
+ || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
{
operands[1] = lowpart_subreg (<ssedvecmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31276,44 +31284,15 @@
DONE;
})
-(define_expand "sdot_prodv64qi"
- [(match_operand:V16SI 0 "register_operand")
- (match_operand:V64QI 1 "register_operand")
- (match_operand:V64QI 2 "register_operand")
- (match_operand:V16SI 3 "register_operand")]
- "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
- /* Emulate with vpdpwssd. */
- rtx op1_lo = gen_reg_rtx (V32HImode);
- rtx op1_hi = gen_reg_rtx (V32HImode);
- rtx op2_lo = gen_reg_rtx (V32HImode);
- rtx op2_hi = gen_reg_rtx (V32HImode);
-
- emit_insn (gen_vec_unpacks_lo_v64qi (op1_lo, operands[1]));
- emit_insn (gen_vec_unpacks_lo_v64qi (op2_lo, operands[2]));
- emit_insn (gen_vec_unpacks_hi_v64qi (op1_hi, operands[1]));
- emit_insn (gen_vec_unpacks_hi_v64qi (op2_hi, operands[2]));
-
- rtx res1 = gen_reg_rtx (V16SImode);
- rtx res2 = gen_reg_rtx (V16SImode);
- rtx sum = gen_reg_rtx (V16SImode);
-
- emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
- emit_insn (gen_addv16si3 (operands[0], res1, res2));
- DONE;
-})
-
(define_expand "udot_prod<mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
- (match_operand:VI1_AVX2 1 "register_operand")
- (match_operand:VI1_AVX2 2 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+ (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
(match_operand:<ssedvecmode> 3 "register_operand")]
"TARGET_SSE2"
{
- if (TARGET_AVXVNNIINT8)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
+ || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
{
operands[1] = lowpart_subreg (<ssedvecmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31352,36 +31331,6 @@
DONE;
})
-(define_expand "udot_prodv64qi"
- [(match_operand:V16SI 0 "register_operand")
- (match_operand:V64QI 1 "register_operand")
- (match_operand:V64QI 2 "register_operand")
- (match_operand:V16SI 3 "register_operand")]
- "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
- /* Emulate with vpdpwssd. */
- rtx op1_lo = gen_reg_rtx (V32HImode);
- rtx op1_hi = gen_reg_rtx (V32HImode);
- rtx op2_lo = gen_reg_rtx (V32HImode);
- rtx op2_hi = gen_reg_rtx (V32HImode);
-
- emit_insn (gen_vec_unpacku_lo_v64qi (op1_lo, operands[1]));
- emit_insn (gen_vec_unpacku_lo_v64qi (op2_lo, operands[2]));
- emit_insn (gen_vec_unpacku_hi_v64qi (op1_hi, operands[1]));
- emit_insn (gen_vec_unpacku_hi_v64qi (op2_hi, operands[2]));
-
- rtx res1 = gen_reg_rtx (V16SImode);
- rtx res2 = gen_reg_rtx (V16SImode);
- rtx sum = gen_reg_rtx (V16SImode);
-
- emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
- emit_insn (gen_addv16si3 (operands[0], res1, res2));
- DONE;
-})
-
(define_insn "vpdp<vpdotprodtype>_<mode>"
[(set (match_operand:VI4_AVX 0 "register_operand" "=v")
(unspec:VI4_AVX
@@ -31757,10 +31706,10 @@
(define_expand "usdot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")
- (match_operand:VI2_AVX2 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
- "TARGET_AVXVNNIINT16"
+ "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
{
operands[1] = lowpart_subreg (<sseunpackmode>mode,
force_reg (<MODE>mode, operands[1]),
@@ -31775,10 +31724,10 @@
(define_expand "udot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")
- (match_operand:VI2_AVX2 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
- "TARGET_AVXVNNIINT16"
+ "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
{
operands[1] = lowpart_subreg (<sseunpackmode>mode,
force_reg (<MODE>mode, operands[1]),