aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2022-10-29 12:03:38 +0100
committerSimon Pilgrim <llvm-dev@redking.me.uk>2022-10-29 12:03:43 +0100
commiteea6a2782e852ee38a56af8245a27d864b56b592 (patch)
tree5157d3d9d940b4bbd645b252c7b880f2ffc2f6d5 /llvm/lib
parent0a0d2f540076d9fee1ee722b5f47cc31be9fa53e (diff)
downloadllvm-eea6a2782e852ee38a56af8245a27d864b56b592.zip
llvm-eea6a2782e852ee38a56af8245a27d864b56b592.tar.gz
llvm-eea6a2782e852ee38a56af8245a27d864b56b592.tar.bz2
[X86] WriteFShuffle256 shuffles aren't microcoded in the llvm sense
znver1/2 might have poor throughput for crosslane shuffles but they don't consume 100 cycles of resources I think there was a misunderstanding between the AMD definition of microcoding (more than 2-3 uops) and LLVM (here be dragons - impossible to approximately model the instruction) This is more yak shaving to come from D103695 - this time working out why codegen involving broadcasts gives such weird numbers
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td7
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td7
2 files changed, 4 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 79beb3e..01deab3 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -372,6 +372,8 @@ defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [8]>;
defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 20, [16], 1>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
+defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 2>;
// Vector integer operations which uses FPU units
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
@@ -479,11 +481,6 @@ defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>;
def : WriteRes<WriteFence, [ZnAGU]>;
def : WriteRes<WriteNop, []>;
-// Following instructions with latency=100 are microcoded.
-// We set long latency so as to block the entire pipeline.
-defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>;
-defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>;
-
// Microcoded Instructions
def ZnWriteMicrocoded : SchedWriteRes<[]> {
let Latency = 100;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index b4f72a96..788b71e 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -371,6 +371,8 @@ defm : Zn2WriteResFpuPair<WriteFSqrt64X, [Zn2FPU3], 20, [10]>;
defm : Zn2WriteResFpuPair<WriteFSqrt64Y, [Zn2FPU3], 20, [10]>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : Zn2WriteResFpuPair<WriteFSqrt80, [Zn2FPU3], 20, [20]>;
+defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 2>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 2>;
// Vector integer operations which uses FPU units
defm : X86WriteRes<WriteVecLoad, [Zn2AGU], 8, [1], 1>;
@@ -478,11 +480,6 @@ defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
def : WriteRes<WriteFence, [Zn2AGU]>;
def : WriteRes<WriteNop, []>;
-// Following instructions with latency=100 are microcoded.
-// We set long latency so as to block the entire pipeline.
-defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
-defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
-
// Microcoded Instructions
def Zn2WriteMicrocoded : SchedWriteRes<[]> {
let Latency = 100;