diff options
author | liuhongt <hongtao.liu@intel.com> | 2021-10-09 09:42:10 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-11-19 09:22:48 +0800 |
commit | d3152981f71eef16e50246a94819c39ff1489c70 (patch) | |
tree | 3d734bec02b4e5230bb92e01fb8b173a30bc848c | |
parent | 09c24fe42ff2cef3f3291f5a7540a5835c08430c (diff) | |
download | gcc-d3152981f71eef16e50246a94819c39ff1489c70.zip gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.gz gcc-d3152981f71eef16e50246a94819c39ff1489c70.tar.bz2 |
Reduce cost of aligned sse register store.
Make them be equal to cost of unaligned ones to avoid odd alignment
peeling.
Impact for SPEC2017 on CLX:
fprate:
503.bwaves_r BuildSame
507.cactuBSSN_r -0.22
508.namd_r -0.02
510.parest_r -0.28
511.povray_r -0.20
519.lbm_r BuildSame
521.wrf_r -0.58
526.blender_r -0.30
527.cam4_r 1.07
538.imagick_r 0.01
544.nab_r -0.09
549.fotonik3d_r BuildSame
554.roms_r BuildSame
intrate:
500.perlbench_r -0.25
502.gcc_r -0.15
505.mcf_r BuildSame
520.omnetpp_r 1.03
523.xalancbmk_r -0.13
525.x264_r -0.05
531.deepsjeng_r -0.27
541.leela_r -0.24
548.exchange2_r -0.06
557.xz_r -0.10
999.specrand_ir 2.69
gcc/ChangeLog:
PR target/102543
* config/i386/x86-tune-costs.h (skylake_cost): Reduce cost of
storing 256/512-bit SSE register to be equal to cost of
unaligned store to avoid odd alignment peeling.
(icelake_cost): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr102543.c: New test.
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr102543.c | 35 |
2 files changed, 37 insertions, 2 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index dd5563d..60d50c9 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1903,7 +1903,7 @@ struct processor_costs skylake_cost = { {6, 6, 6}, /* cost of storing integer registers */ {6, 6, 6, 10, 20}, /* cost of loading SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {8, 8, 8, 12, 24}, /* cost of storing SSE register + {8, 8, 8, 8, 16}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ {6, 6, 6, 10, 20}, /* cost of unaligned loads. */ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ @@ -2029,7 +2029,7 @@ struct processor_costs icelake_cost = { {6, 6, 6}, /* cost of storing integer registers */ {6, 6, 6, 10, 20}, /* cost of loading SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {8, 8, 8, 12, 24}, /* cost of storing SSE register + {8, 8, 8, 8, 16}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ {6, 6, 6, 10, 20}, /* cost of unaligned loads. */ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ diff --git a/gcc/testsuite/gcc.target/i386/pr102543.c b/gcc/testsuite/gcc.target/i386/pr102543.c new file mode 100644 index 0000000..893eb9a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102543.c @@ -0,0 +1,35 @@ +/* PR target/102543 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=skylake-avx512 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not "MEM\\\[" "optimized" } } */ + +struct a +{ + int a[100]; +}; +typedef struct a misaligned_t __attribute__ ((aligned (8))); +typedef struct a aligned_t __attribute__ ((aligned (32))); + +__attribute__ ((used)) +__attribute__ ((noinline)) +void +t(void *a, int misaligned, aligned_t *d) +{ + int i,v; + for (i=0;i<100;i++) + { + if (misaligned) + v=((misaligned_t *)a)->a[i]; + else + v=((aligned_t *)a)->a[i]; + d->a[i]+=v; + } +} +struct b {int v; misaligned_t m;aligned_t aa;} b; +aligned_t d; +int +main() +{ + t(&b.m, 1, &d); + return 0; +} |