From c45ef5ee8da4de239bf7f5b66a45f7e6e797f954 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Tue, 4 Jun 2024 17:11:07 +0100 Subject: i386: Fix dot_prod backend patterns for mmx and sse targets Following the migration of the dot_prod optab from a direct to a conversion-type optab, ensure all back-end patterns incorporate the second machine mode into pattern names. gcc/ChangeLog: * config/i386/mmx.md (usdot_prodv8qi): Renamed to... (usdot_prodv2siv8qi): ...this. (sdot_prodv8qi): Renamed to... (sdot_prodv2siv8qi): ...this. (udot_prodv8qi): Renamed to... (udot_prodv2siv8qi): ...this. (usdot_prodv4hi): Renamed to... (usdot_prodv2siv4hi): ...this. (udot_prodv4hi): Renamed to... (udot_prodv2siv4hi): ...this. (sdot_prodv4hi): Renamed to... (sdot_prodv2siv4hi): ...this. * config/i386/sse.md (sdot_prod): Renamed to... (sdot_prod): ...this. (sdot_prodv4si): Renamed to... (sdot_prodv2div4si): ...this. (usdot_prod): Renamed to... (usdot_prod): ...this. (sdot_prod): Renamed to... (sdot_prod): ...this. (sdot_prodv64qi): Renamed to... (sdot_prodv16siv64qi): ...this. (udot_prod): Renamed to... (udot_prod): ...this. (udot_prodv64qi): Renamed to... (udot_prodv16qiv64qi): ...this. (usdot_prod): Renamed to... (usdot_prod): ...this. (udot_prod): Renamed to... (udot_prod): ...this. --- gcc/config/i386/mmx.md | 30 +++++++++++++++--------------- gcc/config/i386/sse.md | 41 ++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 34 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ca768b9..9d2a82c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -6504,7 +6504,7 @@ DONE; }) -(define_expand "usdot_prodv8qi" +(define_expand "usdot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6523,7 +6523,7 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6537,7 +6537,7 @@ emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6548,7 +6548,7 @@ DONE; }) -(define_expand "sdot_prodv8qi" +(define_expand "sdot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6566,7 +6566,7 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6580,7 +6580,7 @@ emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6592,7 +6592,7 @@ }) -(define_expand "udot_prodv8qi" +(define_expand "udot_prodv2siv8qi" [(match_operand:V2SI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "register_operand") @@ -6610,7 +6610,7 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3)); + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); } else @@ -6624,7 +6624,7 @@ emit_move_insn (op3, CONST0_RTX (V4SImode)); emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1])); emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2])); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); /* vec_perm (op0, 2, 3, 0, 1); */ emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@ -6636,7 +6636,7 @@ }) -(define_expand "usdot_prodv4hi" +(define_expand "usdot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6652,12 +6652,12 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) -(define_expand "udot_prodv4hi" +(define_expand "udot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6673,12 +6673,12 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) -(define_expand "sdot_prodv4hi" +(define_expand "sdot_prodv2siv4hi" [(match_operand:V2SI 0 "register_operand") (match_operand:V4HI 1 "register_operand") (match_operand:V4HI 2 "register_operand") @@ -6694,7 +6694,7 @@ rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode); rtx op0 = gen_reg_rtx (V4SImode); - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3)); + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3)); emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); DONE; }) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 29b0ea3..183c195 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16975,7 +16975,7 @@ (define_mode_attr SDOT_VPDP_SUF [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")]) -(define_expand "sdot_prod" +(define_expand "sdot_prod" [(match_operand: 0 "register_operand") (match_operand:VI2_AVX512VNNIBW 1 "register_operand") (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ -17010,7 +17010,7 @@ ;; Normally we use widen_mul_even/odd, but combine can't quite get it all ;; back together when madd is available. -(define_expand "sdot_prodv4si" +(define_expand "sdot_prodv2div4si" [(match_operand:V2DI 0 "register_operand") (match_operand:V4SI 1 "register_operand") (match_operand:V4SI 2 "register_operand") @@ -30471,7 +30471,7 @@ [(set_attr ("prefix") ("evex")) (set_attr "mode" "")]) -(define_expand "usdot_prod" +(define_expand "usdot_prod" [(match_operand: 0 "register_operand") (match_operand:VI1_AVX512 1 "register_operand") (match_operand:VI1_AVX512 2 "register_operand") @@ -30509,10 +30509,11 @@ rtx sum = gen_reg_rtx (mode); emit_move_insn (sum, CONST0_RTX (mode)); - emit_insn (gen_sdot_prod (res1, op1_lo, - op2_lo, sum)); - emit_insn (gen_sdot_prod (res2, op1_hi, - op2_hi, operands[3])); + emit_insn (gen_sdot_prod (res1, + op1_lo, op2_lo, sum)); + emit_insn (gen_sdot_prod (res2, + op1_hi, op2_hi, + operands[3])); emit_insn (gen_add3 (operands[0], res1, res2)); } DONE; @@ -31336,7 +31337,7 @@ (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) -(define_expand "sdot_prod" +(define_expand "sdot_prod" [(match_operand: 0 "register_operand") (match_operand:VI1_AVX512VNNIBW 1 "register_operand") (match_operand:VI1_AVX512VNNIBW 2 "register_operand") @@ -31373,17 +31374,18 @@ rtx sum = gen_reg_rtx (mode); emit_move_insn (sum, CONST0_RTX (mode)); - emit_insn (gen_sdot_prod (res1, op1_lo, - op2_lo, sum)); - emit_insn (gen_sdot_prod (res2, op1_hi, - op2_hi, operands[3])); + emit_insn (gen_sdot_prod (res1, + op1_lo, op2_lo, sum)); + emit_insn (gen_sdot_prod (res2, + op1_hi, op2_hi, + operands[3])); emit_insn (gen_add3 (operands[0], res1, res2)); } DONE; }) -(define_expand "udot_prod" +(define_expand "udot_prod" [(match_operand: 0 "register_operand") (match_operand:VI1_AVX512VNNIBW 1 "register_operand") (match_operand:VI1_AVX512VNNIBW 2 "register_operand") @@ -31420,10 +31422,11 @@ rtx sum = gen_reg_rtx (mode); emit_move_insn (sum, CONST0_RTX (mode)); - emit_insn (gen_sdot_prod (res1, op1_lo, - op2_lo, sum)); - emit_insn (gen_sdot_prod (res2, op1_hi, - op2_hi, operands[3])); + emit_insn (gen_sdot_prod (res1, + op1_lo, op2_lo, sum)); + emit_insn (gen_sdot_prod (res2, + op1_hi, op2_hi, + operands[3])); emit_insn (gen_add3 (operands[0], res1, res2)); } @@ -31803,7 +31806,7 @@ (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds") (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")]) -(define_expand "usdot_prod" +(define_expand "usdot_prod" [(match_operand: 0 "register_operand") (match_operand:VI2_AVX10_2 1 "register_operand") (match_operand:VI2_AVX10_2 2 "register_operand") @@ -31821,7 +31824,7 @@ DONE; }) -(define_expand "udot_prod" +(define_expand "udot_prod" [(match_operand: 0 "register_operand") (match_operand:VI2_AVX10_2 1 "register_operand") (match_operand:VI2_AVX10_2 2 "register_operand") -- cgit v1.1