aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-04-03 23:28:30 +0000
committerCraig Topper <craig.topper@intel.com>2019-04-03 23:28:30 +0000
commit52cac4b79f97b1793ad06e503a5a62a24e06859c (patch)
tree4ba1913eaa4322cf1bc5097dd0cc4e9daeeca50d
parent477008bd5034a54cb7bddd0e635c19fdffaa524c (diff)
downloadllvm-52cac4b79f97b1793ad06e503a5a62a24e06859c.zip
llvm-52cac4b79f97b1793ad06e503a5a62a24e06859c.tar.gz
llvm-52cac4b79f97b1793ad06e503a5a62a24e06859c.tar.bz2
[X86] Remove CustomInserter pseudos for MONITOR/MONITORX/CLZERO. Use custom instruction selection instead.
This custom inserter existed so we could do a weird thing where we pretended that the instructions support a full address mode instead of taking a pointer in EAX/RAX. I think was largely so we could be pointer size agnostic in the isel pattern. To make this work we would then put the address into an LEA into EAX/RAX in front of the instruction after isel. But the LEA is overkill when we just have a base pointer. So we end up using the LEA as a slower MOV instruction. With this change we now just do custom selection during isel instead and just assign the incoming address of the intrinsic into EAX/RAX based on its size. After the intrinsic is selected, we can let isel take care of selecting an LEA or other operation to do any address computation needed in this basic block. I've also split the instruction into a 32-bit mode version and a 64-bit mode version so the implicit use is properly sized based on the pointer. Without this we get comments in the assembly output about killing eax and defing rax or vice versa depending on whether we define the instruction to use EAX/RAX. llvm-svn: 357652
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp55
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp52
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td36
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td17
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td2
-rw-r--r--llvm/test/CodeGen/X86/apm.ll9
-rw-r--r--llvm/test/CodeGen/X86/clzero.ll3
-rw-r--r--llvm/test/CodeGen/X86/mwaitx.ll4
-rw-r--r--llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll7
9 files changed, 88 insertions, 97 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8d18a1f..272be22 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3420,6 +3420,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse3_monitor:
+ case Intrinsic::x86_monitorx:
+ case Intrinsic::x86_clzero: {
+ bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
+
+ unsigned Opc = 0;
+ switch (IntNo) {
+ case Intrinsic::x86_sse3_monitor:
+ if (!Subtarget->hasSSE3())
+ break;
+ Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
+ break;
+ case Intrinsic::x86_monitorx:
+ if (!Subtarget->hasMWAITX())
+ break;
+ Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
+ break;
+ case Intrinsic::x86_clzero:
+ if (!Subtarget->hasCLZERO())
+ break;
+ Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
+ break;
+ }
+
+ if (Opc) {
+ unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
+ SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
+ Node->getOperand(2), SDValue());
+ SDValue InFlag = Chain.getValue(1);
+
+ if (IntNo == Intrinsic::x86_sse3_monitor ||
+ IntNo == Intrinsic::x86_monitorx) {
+ // Copy the other two operands to ECX and EDX.
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ { Chain, InFlag});
+ ReplaceNode(Node, CNode);
+ return;
+ }
+ }
+ }
+
+ break;
+ }
case ISD::BRIND: {
if (Subtarget->isTargetNaCl())
// NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a05fd55..99cf132 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28306,49 +28306,6 @@ static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
-static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget,
- unsigned Opc) {
- DebugLoc dl = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- // Address into RAX/EAX, other two args into ECX, EDX.
- unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
- unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI.getOperand(i));
-
- unsigned ValOps = X86::AddrNumOperands;
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
- .addReg(MI.getOperand(ValOps).getReg());
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
- .addReg(MI.getOperand(ValOps + 1).getReg());
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(Opc));
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget) {
- DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- // Address into RAX/EAX
- unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
- unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI->getOperand(i));
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
-
- MI->eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
MachineBasicBlock *
@@ -30460,15 +30417,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
- // Thread synchronization.
- case X86::MONITOR:
- return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
- case X86::MONITORX:
- return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
-
- // Cache line zero
- case X86::CLZERO:
- return emitClzero(&MI, BB, Subtarget);
// PKU feature
case X86::WRPKRU:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index f9e7290..e399f8b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -2647,16 +2647,12 @@ defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W;
// MONITORX/MWAITX Instructions
//
let SchedRW = [ WriteSystem ] in {
- let usesCustomInserter = 1 in {
- def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>,
- Requires<[ HasMWAITX ]>;
- }
-
- let Uses = [ EAX, ECX, EDX ] in {
- def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
- TB, Requires<[ HasMWAITX ]>;
- }
+ let Uses = [ EAX, ECX, EDX ] in
+ def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, Not64BitMode ]>;
+ let Uses = [ RAX, ECX, EDX ] in
+ def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, In64BitMode ]>;
let Uses = [ ECX, EAX, EBX ] in {
def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
@@ -2670,9 +2666,9 @@ def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>,
def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>,
Requires<[ In64BitMode ]>;
-def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>,
Requires<[ Not64BitMode ]>;
-def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>,
Requires<[ In64BitMode ]>;
//===----------------------------------------------------------------------===//
@@ -2736,17 +2732,15 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
//
let SchedRW = [WriteSystem] in {
let Uses = [EAX] in
- def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
- TB, Requires<[HasCLZERO]>;
-
- let usesCustomInserter = 1 in {
- def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
- [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
- }
+ def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, Not64BitMode]>;
+ let Uses = [RAX] in
+ def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, In64BitMode]>;
} // SchedRW
-def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
-def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
+def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c3f471e..5bc9b3e 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5044,15 +5044,12 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
//===---------------------------------------------------------------------===//
let SchedRW = [WriteSystem] in {
-let usesCustomInserter = 1 in {
-def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
- Requires<[HasSSE3]>;
-}
-
let Uses = [EAX, ECX, EDX] in
-def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
- TB, Requires<[HasSSE3]>;
+def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+ TB, Requires<[HasSSE3, Not64BitMode]>;
+let Uses = [RAX, ECX, EDX] in
+def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+ TB, Requires<[HasSSE3, In64BitMode]>;
let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
@@ -5062,9 +5059,9 @@ def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
-def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
Requires<[Not64BitMode]>;
-def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index bf50aee..3ebd885 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -741,7 +741,7 @@ def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
let Latency = 45;
let ResourceCycles = [45];
}
-def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+def : InstRW<[AtomWrite01_45], (instrs MONITOR32rrr, MONITOR64rrr)>;
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
let Latency = 46;
diff --git a/llvm/test/CodeGen/X86/apm.ll b/llvm/test/CodeGen/X86/apm.ll
index 859f33a..38866c7 100644
--- a/llvm/test/CodeGen/X86/apm.ll
+++ b/llvm/test/CodeGen/X86/apm.ll
@@ -8,23 +8,22 @@
define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
; X86-LABEL: foo:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: monitor
; X86-NEXT: retl
;
; X64-LABEL: foo:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: leaq (%rdi), %rax
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: monitor
; X64-NEXT: retq
;
; WIN64-LABEL: foo:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: leaq (%rcx), %rax
+; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: movl %edx, %ecx
; WIN64-NEXT: movl %r8d, %edx
; WIN64-NEXT: monitor
diff --git a/llvm/test/CodeGen/X86/clzero.ll b/llvm/test/CodeGen/X86/clzero.ll
index d08470d..a185cb7 100644
--- a/llvm/test/CodeGen/X86/clzero.ll
+++ b/llvm/test/CodeGen/X86/clzero.ll
@@ -5,14 +5,13 @@
define void @foo(i8* %p) #0 {
; X64-LABEL: foo:
; X64: # %bb.0: # %entry
-; X64-NEXT: leaq (%rdi), %rax
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: clzero
; X64-NEXT: retq
;
; X32-LABEL: foo:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: leal (%eax), %eax
; X32-NEXT: clzero
; X32-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/mwaitx.ll b/llvm/test/CodeGen/X86/mwaitx.ll
index 4895297..202a360 100644
--- a/llvm/test/CodeGen/X86/mwaitx.ll
+++ b/llvm/test/CodeGen/X86/mwaitx.ll
@@ -8,13 +8,13 @@ define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: leaq (%rdi), %rax
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: monitorx
; CHECK-NEXT: retq
;
; WIN64-LABEL: foo:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: leaq (%rcx), %rax
+; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: movl %edx, %ecx
; WIN64-NEXT: movl %r8d, %edx
; WIN64-NEXT: monitorx
diff --git a/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll
index dfeb3cf..7124712 100644
--- a/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll
@@ -134,17 +134,16 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
; X86-LABEL: monitor:
; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: leal (%eax), %eax ## encoding: [0x8d,0x00]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c]
; X86-NEXT: monitor ## encoding: [0x0f,0x01,0xc8]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: monitor:
; X64: ## %bb.0:
; X64-NEXT: movl %esi, %ecx ## encoding: [0x89,0xf1]
-; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07]
+; X64-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; X64-NEXT: monitor ## encoding: [0x0f,0x01,0xc8]
; X64-NEXT: retq ## encoding: [0xc3]
tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)