From 657612fb9f58c9cca44b091e3cf40d704fe3ec75 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 19 May 2022 15:32:22 +0800 Subject: Increase move cost between mask and gpr. kmovd only uses port5 which is often the bottleneck of performance. Also from latency perspective, spill and reload mostly could be STLF or even MRN which only take 1 cycle. So the patch increase move cost between gpr and mask to be the same as gpr <-> sse register. gcc/ChangeLog: * config/i386/x86-tune-costs.h (skylake_cost): Increase gpr <-> mask cost from 5 to 6. (icelake_cost): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/spill_to_mask-1.c: New test. --- gcc/config/i386/x86-tune-costs.h | 4 ++-- gcc/testsuite/gcc.target/i386/spill_to_mask-1.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 017ffa6..05cbd49 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1866,7 +1866,7 @@ struct processor_costs skylake_cost = { {8, 8, 8, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ - 5, 5, /* mask->integer and integer->mask moves */ + 6, 6, /* mask->integer and integer->mask moves */ {8, 8, 8}, /* cost of loading mask register in QImode, HImode, SImode. */ {6, 6, 6}, /* cost if storing mask register @@ -1992,7 +1992,7 @@ struct processor_costs icelake_cost = { {8, 8, 8, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ - 5, 5, /* mask->integer and integer->mask moves */ + 6, 6, /* mask->integer and integer->mask moves */ {8, 8, 8}, /* cost of loading mask register in QImode, HImode, SImode. */ {6, 6, 6}, /* cost if storing mask register diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c index 94d6764..be19239 100644 --- a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c +++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c @@ -120,7 +120,7 @@ void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16]) out[7] += h; } -/* { dg-final { scan-assembler "kmovd" } } */ +/* { dg-final { scan-assembler "kmovd" { xfail *-*-* } } } */ /* { dg-final { scan-assembler-not "knot" } } */ /* { dg-final { scan-assembler-not "kxor" } } */ /* { dg-final { scan-assembler-not "kor" } } */ -- cgit v1.1