diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-11-27 11:21:00 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-11-27 11:21:07 +0000 |
commit | 37aebcf4e60e5c913e3d99675548b3e2c631398b (patch) | |
tree | 95f04e312f80a72ce7861b47885bfe629feae133 | |
parent | 124b1f8d85af71e512e6dc6250c8bfa370a33d48 (diff) | |
download | llvm-37aebcf4e60e5c913e3d99675548b3e2c631398b.zip llvm-37aebcf4e60e5c913e3d99675548b3e2c631398b.tar.gz llvm-37aebcf4e60e5c913e3d99675548b3e2c631398b.tar.bz2 |
[X86] Cleanup SFENCE/MFENCE schedules
Remove unnecessary overrides.
UOp + Port usage confirmed by augner/uops.info
23 files changed, 46 insertions, 104 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 595dfbf..42be811 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -615,7 +615,7 @@ def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // Fence instructions. -def : WriteRes<WriteFence, [BWPort23, BWPort4]>; +def : WriteRes<WriteFence, [BWPort23, BWPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } // Nop, not very useful expect it provides a model for nops! def : WriteRes<WriteNop, []>; @@ -722,7 +722,6 @@ def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> { let ReleaseAtCycles = [2]; } def: InstRW<[BWWriteResGroup14], (instrs LFENCE, - MFENCE, WAIT, XGETBV)>; @@ -740,13 +739,6 @@ def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> { } def: InstRW<[BWWriteResGroup17], (instrs MMX_MOVDQ2Qrr)>; -def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> { - let Latency = 2; - let NumMicroOps = 2; - let ReleaseAtCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup18], (instrs SFENCE)>; - def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { let Latency = 2; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index e4faa83..8aa9104 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -627,7 +627,7 @@ def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; } def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; } // Fence instructions. -def : WriteRes<WriteFence, [HWPort23, HWPort4]>; +def : WriteRes<WriteFence, [HWPort23, HWPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } // Nop, not very useful expect it provides a model for nops! def : WriteRes<WriteNop, []>; @@ -1054,13 +1054,6 @@ def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> { def: InstRW<[HWWriteResGroup18], (instrs POP16r, POP32r, POP64r)>; def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)rmr")>; -def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 2; - let ReleaseAtCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup19], (instrs SFENCE)>; - def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { let Latency = 2; let NumMicroOps = 3; @@ -1121,7 +1114,6 @@ def HWWriteResGroup30 : SchedWriteRes<[HWPort0156]> { let ReleaseAtCycles = [2]; } def: InstRW<[HWWriteResGroup30], (instrs LFENCE, - MFENCE, WAIT, XGETBV)>; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 9d6368f..60e14ae 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -607,7 +607,7 @@ defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bi def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; // Fence instructions. -def : WriteRes<WriteFence, [ICXPort78, ICXPort49]>; +def : WriteRes<WriteFence, [ICXPort78, ICXPort49]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } // Load/store MXCSR. def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } @@ -771,13 +771,6 @@ def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> { } def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>; -def ICXWriteResGroup21 : SchedWriteRes<[ICXPort49,ICXPort78]> { - let Latency = 2; - let NumMicroOps = 2; - let ReleaseAtCycles = [1,1]; -} -def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>; - def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -916,13 +909,6 @@ def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> { } def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>; -def ICXWriteResGroup43 : SchedWriteRes<[ICXPort49,ICXPort78]> { - let Latency = 3; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,2]; -} -def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>; - def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> { let Latency = 2; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 775ad6b..560a2b1 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -588,7 +588,7 @@ def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } -def : WriteRes<WriteFence, [SBPort23, SBPort4]>; +def : WriteRes<WriteFence, [SBPort23, SBPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } def : WriteRes<WriteNop, []>; // AVX2/FMA is not supported on that architecture, but we should define the basic diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index e4e833c..8cd52e2 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -608,7 +608,7 @@ defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bi def : WriteRes<WriteMicrocoded, [SKLPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; // Fence instructions. -def : WriteRes<WriteFence, [SKLPort23, SKLPort4]>; +def : WriteRes<WriteFence, [SKLPort23, SKLPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } // Load/store MXCSR. def : WriteRes<WriteLDMXCSR, [SKLPort0,SKLPort23,SKLPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } @@ -738,13 +738,6 @@ def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup20], (instregex "CLFLUSH")>; -def SKLWriteResGroup21 : SchedWriteRes<[SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 2; - let ReleaseAtCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup21], (instrs SFENCE)>; - def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -834,13 +827,6 @@ def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup40], (instregex "CLD")>; -def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup41], (instrs MFENCE)>; - def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 2; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 62cc4a9..14a51d1e 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -602,7 +602,7 @@ defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bi def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; // Fence instructions. -def : WriteRes<WriteFence, [SKXPort23, SKXPort4]>; +def : WriteRes<WriteFence, [SKXPort23, SKXPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } // Load/store MXCSR. def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } @@ -757,13 +757,6 @@ def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; -def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { - let Latency = 2; - let NumMicroOps = 2; - let ReleaseAtCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup21], (instrs SFENCE)>; - def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -894,13 +887,6 @@ def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; -def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { - let Latency = 3; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>; - def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { let Latency = 2; let NumMicroOps = 3; diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s index 291b8cd..344a0d2 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index 25f79397..dffd6b2 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -464,13 +464,13 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U lfence +# CHECK-NEXT: 2 1 1.00 * * U lfence # CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse1.s index c7b5864..3b3bb6d5 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.33 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 7.00 sqrtps %xmm0, %xmm2 @@ -333,7 +333,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 43.00 34.50 50.50 32.00 32.00 8.00 40.50 0.50 3.00 +# CHECK-NEXT: - 43.00 34.25 50.25 32.17 32.17 9.00 40.25 0.25 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -441,7 +441,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtss (%rax), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 - 0.25 0.25 0.33 sfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - sfence # CHECK-NEXT: - - - - - - - 1.00 - - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: - 7.00 1.00 - - - - - - - sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s index 8851be4..9c24726 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s @@ -470,7 +470,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 8 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.50 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 +# CHECK-NEXT: - 78.00 70.25 95.25 63.67 63.67 15.00 116.75 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -756,7 +756,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - maxsd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - mfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - mfence # CHECK-NEXT: - - - 1.00 - - - - - - minpd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - minpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s index 291b8cd..344a0d2 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index 25f79397..dffd6b2 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -464,13 +464,13 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U lfence +# CHECK-NEXT: 2 1 1.00 * * U lfence # CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 37a28a6..c840782 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.33 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 7.00 sqrtps %xmm0, %xmm2 @@ -333,7 +333,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 56.00 34.50 50.50 32.00 32.00 8.00 40.50 0.50 3.00 +# CHECK-NEXT: - 56.00 34.25 50.25 32.17 32.17 9.00 40.25 0.25 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -441,7 +441,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtss (%rax), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 - 0.25 0.25 0.33 sfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - sfence # CHECK-NEXT: - - - - - - - 1.00 - - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: - 7.00 1.00 - - - - - - - sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s index c9c3e20..4a41a79 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s @@ -470,7 +470,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.50 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 +# CHECK-NEXT: - 112.00 70.25 95.25 63.67 63.67 15.00 116.75 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -756,7 +756,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - maxsd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - mfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - mfence # CHECK-NEXT: - - - 1.00 - - - - - - minpd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - minpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse1.s index 01e5794..840f325 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.50 * * U sfence +# CHECK-NEXT: 2 1 0.50 * * U sfence # CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 12 3.00 sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s index 88d1882..6f60cfd 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s @@ -470,7 +470,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 3 3 1.00 * * U mfence +# CHECK-NEXT: 2 1 0.50 * * U mfence # CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2 @@ -691,7 +691,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 40.00 106.92 103.42 58.50 58.50 7.50 71.92 1.75 8.00 8.00 7.50 +# CHECK-NEXT: - 40.00 106.92 103.42 58.50 58.50 7.50 71.92 1.75 7.50 7.50 7.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -758,7 +758,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - maxsd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - 0.50 - - 1.00 1.00 0.50 mfence +# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 mfence # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - minpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - minpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s index 02b5810..02bfa44 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index ff0f22b..7d4332f 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -464,13 +464,13 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U lfence +# CHECK-NEXT: 2 1 1.00 * * U lfence # CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s index 142006c..a3f3133 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.33 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 12 3.00 sqrtps %xmm0, %xmm2 @@ -333,7 +333,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 24.00 73.33 23.33 32.00 32.00 8.00 31.83 0.50 3.00 +# CHECK-NEXT: - 24.00 73.08 23.08 32.17 32.17 9.00 31.58 0.25 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -441,7 +441,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtss (%rax), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 - 0.25 0.25 0.33 sfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - sfence # CHECK-NEXT: - - - - - - - 1.00 - - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s index b527c1c..f7b5d42 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s @@ -470,7 +470,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 3 3 0.50 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 111.58 81.58 63.50 63.50 14.00 91.58 2.25 5.00 +# CHECK-NEXT: - 40.00 111.08 81.08 63.67 63.67 15.00 91.08 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -756,7 +756,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 - 0.50 0.50 0.33 mfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - mfence # CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s index 3d20946..671f0d1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s @@ -298,7 +298,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 10 1.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * rsqrtss (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.33 * * U sfence +# CHECK-NEXT: 2 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1 12 3.00 sqrtps %xmm0, %xmm2 @@ -333,7 +333,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 24.00 66.83 27.83 32.00 32.00 8.00 33.83 0.50 3.00 +# CHECK-NEXT: - 24.00 66.58 27.58 32.17 32.17 9.00 33.58 0.25 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -441,7 +441,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - rsqrtss (%rax), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 - 0.25 0.25 0.33 sfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - sfence # CHECK-NEXT: - - - - - - - 1.00 - - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s index 3a48725..9a41c1b 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s @@ -470,7 +470,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 3 3 0.50 * * U mfence +# CHECK-NEXT: 2 1 1.00 * * U mfence # CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 107.08 86.08 63.50 63.50 14.00 90.58 2.25 5.00 +# CHECK-NEXT: - 40.00 106.58 85.58 63.67 63.67 15.00 90.08 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -756,7 +756,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 - 0.50 0.50 0.33 mfence +# CHECK-NEXT: - - - - 0.50 0.50 1.00 - - - mfence # CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/barrier_output.s b/llvm/test/tools/llvm-mca/X86/barrier_output.s index dddfa04..7eac346 100644 --- a/llvm/test/tools/llvm-mca/X86/barrier_output.s +++ b/llvm/test/tools/llvm-mca/X86/barrier_output.s @@ -19,7 +19,7 @@ maskmovdqu %xmm0, %xmm1 # CHECK: [1] [2] [3] [4] [5] [6] [7] [8] Instructions: # CHECK-NEXT: 4 5 1.00 * * U clflush (%rax) -# CHECK-NEXT: 1 1 1.00 * * U * lfence -# CHECK-NEXT: 1 1 1.00 * * U * * mfence -# CHECK-NEXT: 1 1 1.00 * * U * sfence +# CHECK-NEXT: 2 1 1.00 * * U * lfence +# CHECK-NEXT: 2 1 1.00 * * U * * mfence +# CHECK-NEXT: 2 1 1.00 * * U * sfence # CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 |