i386: Add vec_fm{addsub,subadd}v2sf4 patterns [PR116979]

As mentioned in the PR, the addition of vec_addsubv2sf3 expander caused the testcase to be vectorized and no longer to use fma. The following patch adds new expanders so that it can be vectorized again with the alternating add/sub fma instructions. There is some bug on the slp cost computation side which causes it not to count some scalar multiplication costs, but I think the patch is desirable anyway before that is fixed and the testcase for now just uses -fvect-cost-model=unlimited. 2024-12-13 Jakub Jelinek <jakub@redhat.com> PR target/116979 * config/i386/mmx.md (vec_fmaddsubv2sf4, vec_fmsubaddv2sf4): New define_expand patterns. * gcc.target/i386/pr116979.c: New test.
author: Jakub Jelinek <jakub@redhat.com> 2024-12-13 10:31:04 +0100
committer: Jakub Jelinek <jakub@gcc.gnu.org> 2024-12-13 10:32:57 +0100
commit: 99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf (patch)
tree: 9831e90de5a29627b9a38c3f1447cac492dcd819 /gcc/config/i386
parent: 12a5ab146110631edffcd307a0c10773160f2723 (diff)
download: gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.zip
gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.tar.gz
gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.tar.bz2
1 files changed, 48 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 4daaa2ba..d9725a4 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1132,6 +1132,54 @@
   DONE;
 })
 
+(define_expand "vec_fmaddsubv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmaddsubv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
+(define_expand "vec_fmsubaddv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmsubaddv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point comparisons
author	Jakub Jelinek <jakub@redhat.com>	2024-12-13 10:31:04 +0100
committer	Jakub Jelinek <jakub@gcc.gnu.org>	2024-12-13 10:32:57 +0100
commit	99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf (patch)
tree	9831e90de5a29627b9a38c3f1447cac492dcd819 /gcc/config/i386
parent	12a5ab146110631edffcd307a0c10773160f2723 (diff)
download	gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.zip gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.tar.gz gcc-99b9dfaff66ca6edd534bcf0e7b943a6f816c9bf.tar.bz2