aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2022-11-10 11:58:45 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2022-11-10 12:42:51 +0000
commit30498cf7c46f90db0f67ff01f0246860e55be0f2 (patch)
tree4fc9d14da7644654ff490a2e6e6c17b6ad08b8a4
parent956489700e73e57b569337a9689d69b2e00dba26 (diff)
downloadllvm-30498cf7c46f90db0f67ff01f0246860e55be0f2.zip
llvm-30498cf7c46f90db0f67ff01f0246860e55be0f2.tar.gz
llvm-30498cf7c46f90db0f67ff01f0246860e55be0f2.tar.bz2
[X86] SkylakeClientModel - conversion instructions don't use Port015
Fixes a lot of throughput mismatches - the more complicated conversion instructions use SKLPort5+SKLPort01, not SKLPort5+SKLPort015 (SKLPort015 is mainly used for basic Logic + blend ops) Fixing this should allow us to remove a lot of unnecessary scheduler overrides from SkylakeClientModel Confirmed by both Agner + uops.info
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td6
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s6
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s8
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s18
5 files changed, 27 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index efd9fb5..42b81eb 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -469,14 +469,14 @@ defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort5,SKLPort01], 5, [1,1], 2, 6>;
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
-defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
@@ -928,7 +928,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
-def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
+def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index f28cd83..4f7227b 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67
+# CHECK-NEXT: - 126.00 334.58 203.58 173.17 173.17 34.00 322.58 5.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1832,7 +1832,7 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
@@ -1844,7 +1844,7 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2
@@ -1854,21 +1854,21 @@ vzeroupper
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2ss %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
index 648c7b8..03325f2 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
@@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 3.67 3.67 1.67 1.67 2.00 6.67 - 0.67
+# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
index 1c77b29..007eb96 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
@@ -333,7 +333,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 32.83 0.50 3.00
+# CHECK-NEXT: - 24.00 71.83 24.83 32.00 32.00 8.00 31.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -353,9 +353,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2ss %ecx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2
@@ -363,7 +363,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index 082346c5..31d260b 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00
+# CHECK-NEXT: - 40.00 112.58 80.58 63.50 63.50 14.00 93.58 2.25 5.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -712,9 +712,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
@@ -722,7 +722,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx
@@ -730,15 +730,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %ecx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtss2sd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2dq (%rax), %xmm2