aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2023-06-30 09:44:56 +0800
committerHongyu Wang <hongyu.wang@intel.com>2023-07-06 14:41:22 +0800
commite24b8256fe37df6ad39e2634c66ade70ca7c4e83 (patch)
tree73a338662f6adf611e715a121c2459eccdc7e0d8
parent0d40aeb9b85db87e4a502a530db5b89027d1a3b6 (diff)
downloadgcc-e24b8256fe37df6ad39e2634c66ade70ca7c4e83.zip
gcc-e24b8256fe37df6ad39e2634c66ade70ca7c4e83.tar.gz
gcc-e24b8256fe37df6ad39e2634c66ade70ca7c4e83.tar.bz2
i386: Inline function with default arch/tune to caller
For function with different target attributes, current logic rejects to inline the callee when any arch or tune is mismatched. Relax the condition to allow callee with default arch/tune to be inlined. gcc/ChangeLog: * config/i386/i386.cc (ix86_can_inline_p): If callee has default arch=x86-64 and tune=generic, do not block the inlining to its caller. Also allow callee with different arch= to be inlined if it has always_inline attribute and it's ISA is subset of caller's. gcc/testsuite/ChangeLog: * gcc.target/i386/inline_attr_arch.c: New test. * gcc.target/i386/inline_target_clones.c: Ditto.
-rw-r--r--gcc/config/i386/i386.cc24
-rw-r--r--gcc/testsuite/gcc.target/i386/inline_attr_arch.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/inline_target_clones.c24
3 files changed, 66 insertions, 7 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 0cc4b32..f0d6167 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -605,13 +605,6 @@ ix86_can_inline_p (tree caller, tree callee)
!= (callee_opts->x_target_flags & ~always_inline_safe_mask))
ret = false;
- /* See if arch, tune, etc. are the same. */
- else if (caller_opts->arch != callee_opts->arch)
- ret = false;
-
- else if (!always_inline && caller_opts->tune != callee_opts->tune)
- ret = false;
-
else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
/* If the calle doesn't use FP expressions differences in
ix86_fpmath can be ignored. We are called from FEs
@@ -622,6 +615,23 @@ ix86_can_inline_p (tree caller, tree callee)
|| ipa_fn_summaries->get (callee_node)->fp_expressions))
ret = false;
+ /* At this point we cannot identify whether arch or tune setting
+ comes from target attribute or not. So the most conservative way
+ is to allow the callee that uses default arch and tune string to
+ be inlined. */
+ else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
+ && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
+ ret = true;
+
+ /* See if arch, tune, etc. are the same. As previous ISA flags already
+ checks if callee's ISA is subset of caller's, do not block
+ always_inline attribute for callee even it has different arch. */
+ else if (!always_inline && caller_opts->arch != callee_opts->arch)
+ ret = false;
+
+ else if (!always_inline && caller_opts->tune != callee_opts->tune)
+ ret = false;
+
else if (!always_inline
&& caller_opts->branch_cost != callee_opts->branch_cost)
ret = false;
diff --git a/gcc/testsuite/gcc.target/i386/inline_attr_arch.c b/gcc/testsuite/gcc.target/i386/inline_attr_arch.c
new file mode 100644
index 0000000..1fab485
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/inline_attr_arch.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O3 -march=x86-64" } */
+/* { dg-final { scan-assembler-not "call\[ \t\]+callee" } } */
+
+__attribute__((always_inline,target("arch=haswell")))
+inline float callee (float a, float b, float c, float d,
+ float e, float f, float g, float h)
+{
+ return a * b + c * d + e * f + g + h + a * c + b * c
+ + a * d + b * e + a * f + c * h +
+ b * (a - 0.4f) * (c + h) * (b + e * d) - a / f * h;
+}
+
+__attribute__((target("arch=icelake-server")))
+void caller (int n, float *a,
+ float c1, float c2, float c3,
+ float c4, float c5, float c6,
+ float c7)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = callee (a[i], c1, c2, c3, c4, c5, c6, c7);
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/inline_target_clones.c b/gcc/testsuite/gcc.target/i386/inline_target_clones.c
new file mode 100644
index 0000000..53db160
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/inline_target_clones.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O3 -march=x86-64" } */
+/* { dg-final { scan-assembler-not "call\[ \t\]+callee" } } */
+
+float callee (float a, float b, float c, float d,
+ float e, float f, float g, float h)
+{
+ return a * b + c * d + e * f + g + h + a * c + b * c
+ + a * d + b * e + a * f + c * h +
+ b * (a - 0.4f) * (c + h) * (b + e * d) - a / f * h;
+}
+
+__attribute__((target_clones("default","arch=icelake-server")))
+void caller (int n, float *a,
+ float c1, float c2, float c3,
+ float c4, float c5, float c6,
+ float c7)
+{
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = callee (a[i], c1, c2, c3, c4, c5, c6, c7);
+ }
+}