diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 140 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrCompiler.td | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CodeExtractor.cpp | 11 |
7 files changed, 89 insertions, 116 deletions
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 526305d..892912a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -990,13 +990,13 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); GR.add(GV, GR.CurMF, VarReg); - buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {}); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable)) .addDef(VarReg) .addUse(GR.getSPIRVTypeID(VarTy)) .addImm(SPIRV::StorageClass::UniformConstant) .addUse(Const) .constrainAllUses(TII, TRI, RBI); + buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {}); SPIRVType *SourceTy = GR.getOrCreateSPIRVPointerType( ValTy, I, TII, SPIRV::StorageClass::UniformConstant); SrcReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index cc34cf8..790d86f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -829,7 +829,7 @@ static void insertSpirvDecorations(MachineFunction &MF, MachineIRBuilder MIB) { for (MachineInstr &MI : MBB) { if (!isSpvIntrinsic(MI, Intrinsic::spv_assign_decoration)) continue; - MIB.setInsertPt(*MI.getParent(), MI); + MIB.setInsertPt(*MI.getParent(), MI.getNextNode()); buildOpSpirvDecorations(MI.getOperand(1).getReg(), MIB, MI.getOperand(2).getMetadata()); ToErase.push_back(&MI); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0ae814d..1fd6b24 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18831,44 +18831,66 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -static SDValue -GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags, bool LocalDynamic = false) { +static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, + const EVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags, + bool LoadGlobalBaseReg = false, + bool LocalDynamic = false) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA; bool UseTLSDESC = DAG.getTarget().useTLSDESC(); + SDValue Chain = DAG.getEntryNode(); + SDValue Ret; if (LocalDynamic && UseTLSDESC) { TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags); auto UI = TGA->use_begin(); // Reuse existing GetTLSADDR node if we can find it. - if (UI != TGA->use_end()) - return SDValue(*UI->use_begin()->use_begin(), 0); + if (UI != TGA->use_end()) { + // TLSDESC uses TGA. + auto TLSDescOp = UI; + assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC && + "Unexpected TLSDESC DAG"); + // CALLSEQ_END uses TGA via a chain and glue. + auto *CallSeqEndOp = TLSDescOp->getGluedUser(); + assert(CallSeqEndOp && CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END && + "Unexpected TLSDESC DAG"); + // CopyFromReg uses CALLSEQ_END via a chain and glue. + auto *CopyFromRegOp = CallSeqEndOp->getGluedUser(); + assert(CopyFromRegOp && CopyFromRegOp->getOpcode() == ISD::CopyFromReg && + "Unexpected TLSDESC DAG"); + Ret = SDValue(CopyFromRegOp, 0); + } } else { TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); } - X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC - : LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; + if (!Ret) { + X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC + : LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; - if (InGlue) { - SDValue Ops[] = { Chain, TGA, *InGlue }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops); - } else { - SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops); - } + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + if (LoadGlobalBaseReg) { + SDValue InGlue; + Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), + InGlue); + InGlue = Chain.getValue(1); + Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA, InGlue}); + } else { + Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA}); + } + Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl); - // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. - MFI.setAdjustsStack(true); - MFI.setHasCalls(true); + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MFI.setHasCalls(true); - SDValue Glue = Chain.getValue(1); - SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue); + SDValue Glue = Chain.getValue(1); + Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue); + } if (!UseTLSDESC) return Ret; @@ -18887,30 +18909,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { - SDValue InGlue; - SDLoc dl(GA); // ? function entry point might be better - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), PtrVT), InGlue); - InGlue = Chain.getValue(1); - - return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD); + return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD, + /*LoadGlobalBaseReg=*/true); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64 static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, - X86::RAX, X86II::MO_TLSGD); + return GetTLSADDR(DAG, GA, PtrVT, X86::RAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32 static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, - X86::EAX, X86II::MO_TLSGD); + return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, @@ -18919,22 +18933,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDLoc dl(GA); // Get the start address of the TLS block for this module. - X86MachineFunctionInfo *MFI = DAG.getMachineFunction() - .getInfo<X86MachineFunctionInfo>(); + X86MachineFunctionInfo *MFI = + DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); MFI->incNumLocalDynamicTLSAccesses(); SDValue Base; if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; - Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, - X86II::MO_TLSLD, /*LocalDynamic=*/true); + Base = GetTLSADDR(DAG, GA, PtrVT, ReturnReg, X86II::MO_TLSLD, + /*LoadGlobalBaseReg=*/false, + /*LocalDynamic=*/true); } else { - SDValue InGlue; - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue); - InGlue = Chain.getValue(1); - Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, - X86II::MO_TLSLDM, /*LocalDynamic=*/true); + Base = GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSLDM, + /*LoadGlobalBaseReg=*/true, + /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations @@ -36060,36 +36072,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, } MachineBasicBlock * -X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, - MachineBasicBlock *BB) const { - // So, here we replace TLSADDR with the sequence: - // adjust_stackdown -> TLSADDR -> adjust_stackup. - // We need this because TLSADDR is lowered into calls - // inside MC, therefore without the two markers shrink-wrapping - // may push the prologue/epilogue pass them. - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - const MIMetadata MIMD(MI); - MachineFunction &MF = *BB->getParent(); - - // Emit CALLSEQ_START right before the instruction. - MF.getFrameInfo().setAdjustsStack(true); - unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - MachineInstrBuilder CallseqStart = - BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); - BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); - - // Emit CALLSEQ_END right after the instruction. - // We don't call erase from parent because we want to keep the - // original instruction around. - unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - MachineInstrBuilder CallseqEnd = - BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0); - BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); - - return BB; -} - -MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const { // This is pretty easy. We're taking the value that we received from @@ -37091,16 +37073,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return X86::TMM0_TMM1 + Imm / 2; }; switch (MI.getOpcode()) { - default: llvm_unreachable("Unexpected instr type to insert"); - case X86::TLS_addr32: - case X86::TLS_addr64: - case X86::TLS_addrX32: - case X86::TLS_base_addr32: - case X86::TLS_base_addr64: - case X86::TLS_base_addrX32: - case X86::TLS_desc32: - case X86::TLS_desc64: - return EmitLoweredTLSAddr(MI, BB); + default: + llvm_unreachable("Unexpected instr type to insert"); case X86::INDIRECT_THUNK_CALL32: case X86::INDIRECT_THUNK_CALL64: case X86::INDIRECT_THUNK_TCRETURN32: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 14ada17..2db25d6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1848,9 +1848,6 @@ namespace llvm { MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index a05c3f0..51cee2e 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], - usesCustomInserter = 1, Uses = [ESP, SSP] in { + Uses = [ESP, SSP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, @@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], - usesCustomInserter = 1, Uses = [RSP, SSP] in { + Uses = [RSP, SSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, @@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), // TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent // stack-pointer assignments that appear immediately before calls from // potentially appearing dead. -let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in { +let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in { def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>; def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym), diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 1efec22..17d8283 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -472,16 +472,24 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) { Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; - Value *V = Swap ? I.getOperand(0) : I.getOperand(1); + Constant *Const = LastVisited->second; + bool ConstOnRHS = I.getOperand(1) == LastVisited->first; + Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); - if (!Other) - return nullptr; - Constant *Const = LastVisited->second; - return Swap ? - ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL) - : ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL); + if (Other) { + if (ConstOnRHS) + std::swap(Const, Other); + return ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL); + } + + // If we haven't found Other to be a specific constant value, we may still be + // able to constant fold using information from the lattice value. + const ValueLatticeElement &ConstLV = ValueLatticeElement::get(Const); + const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V); + auto &V1State = ConstOnRHS ? OtherLV : ConstLV; + auto &V2State = ConstOnRHS ? ConstLV : OtherLV; + return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL); } Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { @@ -493,16 +501,17 @@ Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; - Value *V = Swap ? I.getOperand(0) : I.getOperand(1); + bool ConstOnRHS = I.getOperand(1) == LastVisited->first; + Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); - if (!Other) - return nullptr; + Value *OtherVal = Other ? Other : V; + Value *ConstVal = LastVisited->second; - Constant *Const = LastVisited->second; - return dyn_cast_or_null<Constant>(Swap ? - simplifyBinOp(I.getOpcode(), Other, Const, SimplifyQuery(DL)) - : simplifyBinOp(I.getOpcode(), Const, Other, SimplifyQuery(DL))); + if (ConstOnRHS) + std::swap(ConstVal, OtherVal); + + return dyn_cast_or_null<Constant>( + simplifyBinOp(I.getOpcode(), ConstVal, OtherVal, SimplifyQuery(DL))); } Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 440912e..b6906ab 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -823,21 +823,16 @@ Function *CodeExtractor::constructFunctionDeclaration( // Assemble the function's parameter lists. std::vector<Type *> ParamTy; std::vector<Type *> AggParamTy; - std::vector<std::tuple<unsigned, Value *>> NumberedInputs; - std::vector<std::tuple<unsigned, Value *>> NumberedOutputs; const DataLayout &DL = M->getDataLayout(); // Add the types of the input values to the function's argument list - unsigned ArgNum = 0; for (Value *value : inputs) { LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) { AggParamTy.push_back(value->getType()); StructValues.insert(value); - } else { + } else ParamTy.push_back(value->getType()); - NumberedInputs.emplace_back(ArgNum++, value); - } } // Add the types of the output values to the function's argument list. @@ -846,11 +841,9 @@ Function *CodeExtractor::constructFunctionDeclaration( if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { AggParamTy.push_back(output->getType()); StructValues.insert(output); - } else { + } else ParamTy.push_back( PointerType::get(output->getType(), DL.getAllocaAddrSpace())); - NumberedOutputs.emplace_back(ArgNum++, output); - } } assert( |