Reduce cost of aligned sse register store.

Make them be equal to cost of unaligned ones to avoid odd alignment peeling. Impact for SPEC2017 on CLX: fprate: 503.bwaves_r BuildSame 507.cactuBSSN_r -0.22 508.namd_r -0.02 510.parest_r -0.28 511.povray_r -0.20 519.lbm_r BuildSame 521.wrf_r -0.58 526.blender_r -0.30 527.cam4_r 1.07 538.imagick_r 0.01 544.nab_r -0.09 549.fotonik3d_r BuildSame 554.roms_r BuildSame intrate: 500.perlbench_r -0.25 502.gcc_r -0.15 505.mcf_r BuildSame 520.omnetpp_r 1.03 523.xalancbmk_r -0.13 525.x264_r -0.05 531.deepsjeng_r -0.27 541.leela_r -0.24 548.exchange2_r -0.06 557.xz_r -0.10 999.specrand_ir 2.69 gcc/ChangeLog: PR target/102543 * config/i386/x86-tune-costs.h (skylake_cost): Reduce cost of storing 256/512-bit SSE register to be equal to cost of unaligned store to avoid odd alignment peeling. (icelake_cost): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102543.c: New test.
author: liuhongt <hongtao.liu@intel.com> 2021-10-09 09:42:10 +0800
committer: liuhongt <hongtao.liu@intel.com> 2021-11-19 09:22:48 +0800
commit: d3152981f71eef16e50246a94819c39ff1489c70 (patch)
tree: 3d734bec02b4e5230bb92e01fb8b173a30bc848c
parent: 09c24fe42ff2cef3f3291f5a7540a5835c08430c (diff)
download: gcc-d3152981f71eef16e50246a94819c39ff1489c70.zip
gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.gz
gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.bz2
2 files changed, 37 insertions, 2 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index dd5563d..60d50c9 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1903,7 +1903,7 @@ struct processor_costs skylake_cost = {
   {6, 6, 6},				/* cost of storing integer registers */
   {6, 6, 6, 10, 20},			/* cost of loading SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {8, 8, 8, 12, 24},			/* cost of storing SSE register
+  {8, 8, 8, 8, 16},			/* cost of storing SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
@@ -2029,7 +2029,7 @@ struct processor_costs icelake_cost = {
   {6, 6, 6},				/* cost of storing integer registers */
   {6, 6, 6, 10, 20},			/* cost of loading SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {8, 8, 8, 12, 24},			/* cost of storing SSE register
+  {8, 8, 8, 8, 16},			/* cost of storing SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr102543.c b/gcc/testsuite/gcc.target/i386/pr102543.c
new file mode 100644
index 0000000..893eb9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102543.c
@@ -0,0 +1,35 @@
+/* PR target/102543 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=skylake-avx512 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "MEM\\\[" "optimized" } } */
+
+struct a
+{
+  int a[100];
+};
+typedef struct a misaligned_t __attribute__ ((aligned (8)));
+typedef struct a aligned_t __attribute__ ((aligned (32)));
+
+__attribute__ ((used))
+__attribute__ ((noinline))
+void
+t(void *a, int misaligned, aligned_t *d)
+{
+  int i,v;
+  for (i=0;i<100;i++)
+    {
+      if (misaligned)
+	v=((misaligned_t *)a)->a[i];
+      else
+	v=((aligned_t *)a)->a[i];
+      d->a[i]+=v;
+    }
+}
+struct b {int v; misaligned_t m;aligned_t aa;} b;
+aligned_t d;
+int
+main()
+{
+  t(&b.m, 1, &d);
+  return 0;
+}
author	liuhongt <hongtao.liu@intel.com>	2021-10-09 09:42:10 +0800
committer	liuhongt <hongtao.liu@intel.com>	2021-11-19 09:22:48 +0800
commit	d3152981f71eef16e50246a94819c39ff1489c70 (patch)
tree	3d734bec02b4e5230bb92e01fb8b173a30bc848c
parent	09c24fe42ff2cef3f3291f5a7540a5835c08430c (diff)
download	gcc-d3152981f71eef16e50246a94819c39ff1489c70.zip gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.gz gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.bz2