middle-end: don't form FMAs when multiplication is not single use. [PR108583]

The testcase typedef unsigned int vec __attribute__((vector_size(32))); vec f3 (vec a, vec b, vec c) { vec d = a * b; return d + ((c + d) >> 1); } shows a case where we don't want to form an FMA due to the MUL not being single use. In this case to form an FMA we have to redo the MUL as well as we no longer have it to share. As such making an FMA here would be a de-optimization. gcc/ChangeLog: PR target/108583 * tree-ssa-math-opts.cc (convert_mult_to_fma): Inhibit FMA in case not single use. gcc/testsuite/ChangeLog: PR target/108583 * gcc.dg/mla_1.c: New test. Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>
author: Tamar Christina <tamar.christina@arm.com> 2023-03-12 18:40:50 +0000
committer: Tamar Christina <tamar.christina@arm.com> 2023-03-12 18:40:50 +0000
commit: 0b3c630fcc44063a61f6131af48a4171b1de2b37 (patch)
tree: b3033ac2f983291900c22964bfc11f10d12635ac /gcc
parent: 03c6ba86757f0684c5419c90651106900f5ecb5a (diff)
download: gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.zip
gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.tar.gz
gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.tar.bz2
2 files changed, 54 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/mla_1.c b/gcc/testsuite/gcc.dg/mla_1.c
new file mode 100644
index 0000000..98e5808
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/mla_1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O2 -msve-vector-bits=256 -march=armv8.2-a+sve -fdump-tree-optimized" { target aarch64*-*-* } } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b, unsigned int c) {
+  unsigned int d = a * b;
+  return d + ((c + d) >> 1);
+}
+
+unsigned int
+g1 (unsigned int a, unsigned int b, unsigned int c) {
+  return a * b + c;
+}
+
+__Uint32x4_t
+f2 (__Uint32x4_t a, __Uint32x4_t b, __Uint32x4_t c) {
+  __Uint32x4_t d = a * b;
+  return d + ((c + d) >> 1);
+}
+
+__Uint32x4_t
+g2 (__Uint32x4_t a, __Uint32x4_t b, __Uint32x4_t c) {
+  return a * b + c;
+}
+
+typedef unsigned int vec __attribute__((vector_size(32))); vec
+f3 (vec a, vec b, vec c)
+{
+  vec d = a * b;
+  return d + ((c + d) >> 1);
+}
+
+vec
+g3 (vec a, vec b, vec c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-tree-dump-times {\.FMA } 1 "optimized" { target aarch64*-*-* } } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 5ab5b94..26ed91d 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3346,6 +3346,20 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
 		    param_avoid_fma_max_bits));
   bool defer = check_defer;
   bool seen_negate_p = false;
+
+  /* There is no numerical difference between fused and unfused integer FMAs,
+     and the assumption below that FMA is as cheap as addition is unlikely
+     to be true, especially if the multiplication occurs multiple times on
+     the same chain.  E.g., for something like:
+
+	 (((a * b) + c) >> 1) + (a * b)
+
+     we do not want to duplicate the a * b into two additions, not least
+     because the result is not a natural FMA chain.  */
+  if (ANY_INTEGRAL_TYPE_P (type)
+      && !has_single_use (mul_result))
+    return false;
+
   /* Make sure that the multiplication statement becomes dead after
      the transformation, thus that all uses are transformed to FMAs.
      This means we assume that an FMA operation has the same cost
author	Tamar Christina <tamar.christina@arm.com>	2023-03-12 18:40:50 +0000
committer	Tamar Christina <tamar.christina@arm.com>	2023-03-12 18:40:50 +0000
commit	0b3c630fcc44063a61f6131af48a4171b1de2b37 (patch)
tree	b3033ac2f983291900c22964bfc11f10d12635ac /gcc
parent	03c6ba86757f0684c5419c90651106900f5ecb5a (diff)
download	gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.zip gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.tar.gz gcc-0b3c630fcc44063a61f6131af48a4171b1de2b37.tar.bz2