diff options
Diffstat (limited to 'llvm/lib')
31 files changed, 344 insertions, 189 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index dd98b62..c14cb9e 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1485,6 +1485,9 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, switch (Opcode) { default: llvm_unreachable("Missing case"); + case Instruction::PtrToAddr: + // TODO: Add some of the ptrtoint folds here as well. + break; case Instruction::PtrToInt: if (auto *CE = dyn_cast<ConstantExpr>(C)) { Constant *FoldedValue = nullptr; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 1e70228..b0e4b00 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9147,7 +9147,8 @@ static bool matchTwoInputRecurrence(const PHINode *PN, InstTy *&Inst, return false; for (unsigned I = 0; I != 2; ++I) { - if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I))) { + if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I)); + Operation && Operation->getNumOperands() >= 2) { Value *LHS = Operation->getOperand(0); Value *RHS = Operation->getOperand(1); if (LHS != PN && RHS != PN) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 520c6a0..3d5bd61 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -928,6 +928,7 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(fptoui, FPToUI); INSTKEYWORD(fptosi, FPToSI); INSTKEYWORD(inttoptr, IntToPtr); + INSTKEYWORD(ptrtoaddr, PtrToAddr); INSTKEYWORD(ptrtoint, PtrToInt); INSTKEYWORD(bitcast, BitCast); INSTKEYWORD(addrspacecast, AddrSpaceCast); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 13bef1f..1bc2906 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4273,6 +4273,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { case lltok::kw_bitcast: case lltok::kw_addrspacecast: case lltok::kw_inttoptr: + case lltok::kw_ptrtoaddr: case lltok::kw_ptrtoint: { unsigned Opc = Lex.getUIntVal(); Type *DestTy = nullptr; @@ -7310,6 +7311,7 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_fptoui: case lltok::kw_fptosi: case lltok::kw_inttoptr: + case lltok::kw_ptrtoaddr: case lltok::kw_ptrtoint: return parseCast(Inst, PFS, KeywordVal); case lltok::kw_fptrunc: diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 290d873..22a0d0f 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1283,6 +1283,7 @@ static int getDecodedCastOpcode(unsigned Val) { case bitc::CAST_SITOFP : return Instruction::SIToFP; case bitc::CAST_FPTRUNC : return Instruction::FPTrunc; case bitc::CAST_FPEXT : return Instruction::FPExt; + case bitc::CAST_PTRTOADDR: return Instruction::PtrToAddr; case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; case bitc::CAST_BITCAST : return Instruction::BitCast; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 05680fa..a3f8254 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -647,6 +647,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) { case Instruction::SIToFP : return bitc::CAST_SITOFP; case Instruction::FPTrunc : return bitc::CAST_FPTRUNC; case Instruction::FPExt : return bitc::CAST_FPEXT; + case Instruction::PtrToAddr: return bitc::CAST_PTRTOADDR; case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; case Instruction::BitCast : return bitc::CAST_BITCAST; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c72b6e8..23a3543 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3657,6 +3657,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV, break; // Error } + case Instruction::PtrToAddr: case Instruction::PtrToInt: { const DataLayout &DL = getDataLayout(); diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index ca51b67..5f37890 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -1001,7 +1001,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, ++CritIdx; if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); + int PDiff = (int)PNew - CriticalPSets[CritIdx].getUnitInc(); if (PDiff > 0) { Delta.CriticalMax = PressureChange(i); Delta.CriticalMax.setUnitInc(PDiff); @@ -1191,7 +1191,7 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, ++CritIdx; if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { - int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + int CritInc = (int)MNew - CriticalPSets[CritIdx].getUnitInc(); if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) { Delta.CriticalMax = PressureChange(PSetID); Delta.CriticalMax.setUnitInc(CritInc); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0d1e954..48ab797 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3977,6 +3977,11 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } +void SelectionDAGBuilder::visitPtrToAddr(const User &I) { + // FIXME: this is not correct for pointers with addr width != pointer width + visitPtrToInt(I); +} + void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c251755..e0835e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -574,6 +574,7 @@ private: void visitFPToSI(const User &I); void visitUIToFP(const User &I); void visitSIToFP(const User &I); + void visitPtrToAddr(const User &I); void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index bf4c9f9..d80a229 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1893,6 +1893,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case SIToFP: return ISD::SINT_TO_FP; case FPTrunc: return ISD::FP_ROUND; case FPExt: return ISD::FP_EXTEND; + case PtrToAddr: return ISD::BITCAST; case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp index 9d84aa8..72308a3d 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp @@ -29,7 +29,7 @@ bool verifyRegisterValue(uint32_t RegisterValue) { // This Range is reserverved, therefore invalid, according to the spec // https://github.com/llvm/wg-hlsl/blob/main/proposals/0002-root-signature-in-clang.md#all-the-values-should-be-legal bool verifyRegisterSpace(uint32_t RegisterSpace) { - return !(RegisterSpace >= 0xFFFFFFF0 && RegisterSpace <= 0xFFFFFFFF); + return !(RegisterSpace >= 0xFFFFFFF0); } bool verifyRootDescriptorFlag(uint32_t Version, uint32_t FlagsVal) { diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index d4ad21e..6b202ba 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -254,6 +254,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, return FoldBitCast(V, DestTy); case Instruction::AddrSpaceCast: case Instruction::IntToPtr: + case Instruction::PtrToAddr: case Instruction::PtrToInt: return nullptr; } diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index e09c139..2fcdbcc6 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -829,6 +829,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::IntToPtr: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::AddrSpaceCast: // Conservatively return getFull set. diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index a3c725b..c7e3113a 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1567,6 +1567,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty, case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: @@ -2223,6 +2224,8 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty, llvm_unreachable("Invalid cast opcode"); case Instruction::Trunc: return getTrunc(C, Ty, OnlyIfReduced); + case Instruction::PtrToAddr: + return getPtrToAddr(C, Ty, OnlyIfReduced); case Instruction::PtrToInt: return getPtrToInt(C, Ty, OnlyIfReduced); case Instruction::IntToPtr: @@ -2280,6 +2283,20 @@ Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) { return getFoldedCast(Instruction::Trunc, C, Ty, OnlyIfReduced); } +Constant *ConstantExpr::getPtrToAddr(Constant *C, Type *DstTy, + bool OnlyIfReduced) { + assert(C->getType()->isPtrOrPtrVectorTy() && + "PtrToAddr source must be pointer or pointer vector"); + assert(DstTy->isIntOrIntVectorTy() && + "PtrToAddr destination must be integer or integer vector"); + assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy)); + if (isa<VectorType>(C->getType())) + assert(cast<VectorType>(C->getType())->getElementCount() == + cast<VectorType>(DstTy)->getElementCount() && + "Invalid cast between a different number of vector elements"); + return getFoldedCast(Instruction::PtrToAddr, C, DstTy, OnlyIfReduced); +} + Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, bool OnlyIfReduced) { assert(C->getType()->isPtrOrPtrVectorTy() && @@ -2435,6 +2452,7 @@ bool ConstantExpr::isDesirableCastOp(unsigned Opcode) { case Instruction::FPToSI: return false; case Instruction::Trunc: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: @@ -2457,6 +2475,7 @@ bool ConstantExpr::isSupportedCastOp(unsigned Opcode) { case Instruction::FPToSI: return false; case Instruction::Trunc: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: @@ -3401,6 +3420,7 @@ Instruction *ConstantExpr::getAsInstruction() const { switch (getOpcode()) { case Instruction::Trunc: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index f1d4549..96065ed 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -57,15 +57,9 @@ DebugVariable::DebugVariable(const DbgVariableRecord *DVR) DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line, unsigned Column, uint64_t AtomGroup, uint8_t AtomRank, ArrayRef<Metadata *> MDs, bool ImplicitCode) - : MDNode(C, DILocationKind, Storage, MDs) -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS - , - AtomGroup(AtomGroup), AtomRank(AtomRank) -#endif -{ -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS + : MDNode(C, DILocationKind, Storage, MDs), AtomGroup(AtomGroup), + AtomRank(AtomRank) { assert(AtomRank <= 7 && "AtomRank number should fit in 3 bits"); -#endif if (AtomGroup) C.updateDILocationAtomGroupWaterline(AtomGroup + 1); diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 7b799c7..11d33e2 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -404,6 +404,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases, return findBaseObject(CE->getOperand(0), Aliases, Op); } case Instruction::IntToPtr: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::GetElementPtr: diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index b7cd12a..4540268 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -817,6 +817,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case UIToFP: return "uitofp"; case SIToFP: return "sitofp"; case IntToPtr: return "inttoptr"; + case PtrToAddr: return "ptrtoaddr"; case PtrToInt: return "ptrtoint"; case BitCast: return "bitcast"; case AddrSpaceCast: return "addrspacecast"; diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index b896382..a1751c0 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2798,6 +2798,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, return false; case Instruction::BitCast: return true; // BitCast never modifies bits. + case Instruction::PtrToAddr: case Instruction::PtrToInt: return DL.getIntPtrType(SrcTy)->getScalarSizeInBits() == DestTy->getScalarSizeInBits(); @@ -2855,26 +2856,29 @@ unsigned CastInst::isEliminableCastPair( // same reason. const unsigned numCastOps = Instruction::CastOpsEnd - Instruction::CastOpsBegin; + // clang-format off static const uint8_t CastResults[numCastOps][numCastOps] = { - // T F F U S F F P I B A -+ - // R Z S P P I I T P 2 N T S | - // U E E 2 2 2 2 R E I T C C +- secondOp - // N X X U S F F N X N 2 V V | - // C T T I I P P C T T P T T -+ - { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc -+ - { 8, 1, 9,99,99, 2,17,99,99,99, 2, 3, 0}, // ZExt | - { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI | - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP | - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // FPTrunc | - { 99,99,99, 2, 2,99,99, 8, 2,99,99, 4, 0}, // FPExt | - { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt | - { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr | - { 5, 5, 5, 0, 0, 5, 5, 0, 0,16, 5, 1,14}, // BitCast | - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ + // T F F U S F F P P I B A -+ + // R Z S P P I I T P 2 2 N T S | + // U E E 2 2 2 2 R E I A T C C +- secondOp + // N X X U S F F N X N D 2 V V | + // C T T I I P P C T T R P T T -+ + { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // Trunc -+ + { 8, 1, 9,99,99, 2,17,99,99,99,99, 2, 3, 0}, // ZExt | + { 8, 0, 1,99,99, 0, 2,99,99,99,99, 0, 3, 0}, // SExt | + { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToUI | + { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToSI | + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // UIToFP +- firstOp + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // SIToFP | + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // FPTrunc | + { 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt | + { 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt | + { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr | + { 99,99,99,99,99,99,99,99,99,11,99,99,15, 0}, // IntToPtr | + { 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast | + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ }; + // clang-format on // TODO: This logic could be encoded into the table above and handled in the // switch below. @@ -3046,6 +3050,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore); case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore); case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore); + case PtrToAddr: return new PtrToAddrInst (S, Ty, Name, InsertBefore); case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore); case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore); case BitCast: @@ -3347,6 +3352,7 @@ CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) { case Instruction::FPToSI: return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() && SrcEC == DstEC; + case Instruction::PtrToAddr: case Instruction::PtrToInt: if (SrcEC != DstEC) return false; @@ -3460,6 +3466,12 @@ PtrToIntInst::PtrToIntInst(Value *S, Type *Ty, const Twine &Name, assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } +PtrToAddrInst::PtrToAddrInst(Value *S, Type *Ty, const Twine &Name, + InsertPosition InsertBefore) + : CastInst(Ty, PtrToAddr, S, Name, InsertBefore) { + assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToAddr"); +} + IntToPtrInst::IntToPtrInst(Value *S, Type *Ty, const Twine &Name, InsertPosition InsertBefore) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { @@ -4427,6 +4439,10 @@ PtrToIntInst *PtrToIntInst::cloneImpl() const { return new PtrToIntInst(getOperand(0), getType()); } +PtrToAddrInst *PtrToAddrInst::cloneImpl() const { + return new PtrToAddrInst(getOperand(0), getType()); +} + IntToPtrInst *IntToPtrInst::cloneImpl() const { return new IntToPtrInst(getOperand(0), getType()); } diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index aa2a60e..e03f993 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -312,10 +312,8 @@ template <> struct MDNodeKeyImpl<MDTuple> : MDNodeOpsKey { template <> struct MDNodeKeyImpl<DILocation> { Metadata *Scope; Metadata *InlinedAt; -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS uint64_t AtomGroup : 61; uint64_t AtomRank : 3; -#endif unsigned Line; uint16_t Column; bool ImplicitCode; @@ -323,36 +321,24 @@ template <> struct MDNodeKeyImpl<DILocation> { MDNodeKeyImpl(unsigned Line, uint16_t Column, Metadata *Scope, Metadata *InlinedAt, bool ImplicitCode, uint64_t AtomGroup, uint8_t AtomRank) - : Scope(Scope), InlinedAt(InlinedAt), -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS - AtomGroup(AtomGroup), AtomRank(AtomRank), -#endif - Line(Line), Column(Column), ImplicitCode(ImplicitCode) { - } + : Scope(Scope), InlinedAt(InlinedAt), AtomGroup(AtomGroup), + AtomRank(AtomRank), Line(Line), Column(Column), + ImplicitCode(ImplicitCode) {} MDNodeKeyImpl(const DILocation *L) : Scope(L->getRawScope()), InlinedAt(L->getRawInlinedAt()), -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS AtomGroup(L->getAtomGroup()), AtomRank(L->getAtomRank()), -#endif Line(L->getLine()), Column(L->getColumn()), - ImplicitCode(L->isImplicitCode()) { - } + ImplicitCode(L->isImplicitCode()) {} bool isKeyOf(const DILocation *RHS) const { return Line == RHS->getLine() && Column == RHS->getColumn() && Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt() && - ImplicitCode == RHS->isImplicitCode() -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS - && AtomGroup == RHS->getAtomGroup() && - AtomRank == RHS->getAtomRank(); -#else - ; -#endif + ImplicitCode == RHS->isImplicitCode() && + AtomGroup == RHS->getAtomGroup() && AtomRank == RHS->getAtomRank(); } unsigned getHashValue() const { -#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS // Hashing AtomGroup and AtomRank substantially impacts performance whether // Key Instructions is enabled or not. We can't detect whether it's enabled // here cheaply; avoiding hashing zero values is a good approximation. This @@ -363,7 +349,6 @@ template <> struct MDNodeKeyImpl<DILocation> { if (AtomGroup || AtomRank) return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode, AtomGroup, (uint8_t)AtomRank); -#endif return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode); } }; diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 129ca4a..5928c89 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -747,34 +747,28 @@ const Value *Value::stripAndAccumulateConstantOffsets( // means when we construct GEPOffset, we need to use the size // of GEP's pointer type rather than the size of the original // pointer type. - unsigned CurBitWidth = DL.getIndexTypeSizeInBits(V->getType()); - if (CurBitWidth == BitWidth) { - if (!GEP->accumulateConstantOffset(DL, Offset, ExternalAnalysis)) - return V; - } else { - APInt GEPOffset(CurBitWidth, 0); - if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis)) - return V; + APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0); + if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis)) + return V; - // Stop traversal if the pointer offset wouldn't fit in the bit-width - // provided by the Offset argument. This can happen due to AddrSpaceCast - // stripping. - if (GEPOffset.getSignificantBits() > BitWidth) - return V; + // Stop traversal if the pointer offset wouldn't fit in the bit-width + // provided by the Offset argument. This can happen due to AddrSpaceCast + // stripping. + if (GEPOffset.getSignificantBits() > BitWidth) + return V; - // External Analysis can return a result higher/lower than the value - // represents. We need to detect overflow/underflow. - APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth); - if (!ExternalAnalysis) { - Offset += GEPOffsetST; - } else { - bool Overflow = false; - APInt OldOffset = Offset; - Offset = Offset.sadd_ov(GEPOffsetST, Overflow); - if (Overflow) { - Offset = OldOffset; - return V; - } + // External Analysis can return a result higher/lower than the value + // represents. We need to detect overflow/underflow. + APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth); + if (!ExternalAnalysis) { + Offset += GEPOffsetST; + } else { + bool Overflow = false; + APInt OldOffset = Offset; + Offset = Offset.sadd_ov(GEPOffsetST, Overflow); + if (Overflow) { + Offset = OldOffset; + return V; } } V = GEP->getPointerOperand(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f5dcb5e..1d3c379 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -566,6 +566,8 @@ private: void visitUIToFPInst(UIToFPInst &I); void visitSIToFPInst(SIToFPInst &I); void visitIntToPtrInst(IntToPtrInst &I); + void checkPtrToAddr(Type *SrcTy, Type *DestTy, const Value &V); + void visitPtrToAddrInst(PtrToAddrInst &I); void visitPtrToIntInst(PtrToIntInst &I); void visitBitCastInst(BitCastInst &I); void visitAddrSpaceCastInst(AddrSpaceCastInst &I); @@ -834,6 +836,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { &GV); Check(GV.getInitializer()->getType()->isSized(), "Global variable initializer must be sized", &GV); + visitConstantExprsRecursively(GV.getInitializer()); // If the global has common linkage, it must have a zero initializer and // cannot be constant. if (GV.hasCommonLinkage()) { @@ -2610,6 +2613,8 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { Check(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0), CE->getType()), "Invalid bitcast", CE); + else if (CE->getOpcode() == Instruction::PtrToAddr) + checkPtrToAddr(CE->getOperand(0)->getType(), CE->getType(), *CE); } void Verifier::visitConstantPtrAuth(const ConstantPtrAuth *CPA) { @@ -3532,6 +3537,28 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { visitInstruction(I); } +void Verifier::checkPtrToAddr(Type *SrcTy, Type *DestTy, const Value &V) { + Check(SrcTy->isPtrOrPtrVectorTy(), "PtrToAddr source must be pointer", V); + Check(DestTy->isIntOrIntVectorTy(), "PtrToAddr result must be integral", V); + Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToAddr type mismatch", + V); + + if (SrcTy->isVectorTy()) { + auto *VSrc = cast<VectorType>(SrcTy); + auto *VDest = cast<VectorType>(DestTy); + Check(VSrc->getElementCount() == VDest->getElementCount(), + "PtrToAddr vector length mismatch", V); + } + + Type *AddrTy = DL.getAddressType(SrcTy); + Check(AddrTy == DestTy, "PtrToAddr result must be address width", V); +} + +void Verifier::visitPtrToAddrInst(PtrToAddrInst &I) { + checkPtrToAddr(I.getOperand(0)->getType(), I.getType(), I); + visitInstruction(I); +} + void Verifier::visitPtrToIntInst(PtrToIntInst &I) { // Get the source and destination types Type *SrcTy = I.getOperand(0)->getType(); @@ -3547,7 +3574,7 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { auto *VSrc = cast<VectorType>(SrcTy); auto *VDest = cast<VectorType>(DestTy); Check(VSrc->getElementCount() == VDest->getElementCount(), - "PtrToInt Vector width mismatch", &I); + "PtrToInt Vector length mismatch", &I); } visitInstruction(I); @@ -3567,7 +3594,7 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { auto *VSrc = cast<VectorType>(SrcTy); auto *VDest = cast<VectorType>(DestTy); Check(VSrc->getElementCount() == VDest->getElementCount(), - "IntToPtr Vector width mismatch", &I); + "IntToPtr Vector length mismatch", &I); } visitInstruction(I); } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 7ca26aa..df807fc 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData( DataAccessProfileData = std::move(DataAccessProfDataIn); } -void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { - assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); - assert(!Trace.FunctionNameRefs.empty()); - if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { - // Simply append the trace if we have not yet hit our reservoir size limit. - TemporalProfTraces.push_back(std::move(Trace)); - } else { - // Otherwise, replace a random trace in the stream. - std::uniform_int_distribution<uint64_t> Distribution( - 0, TemporalProfTraceStreamSize); - uint64_t RandomIndex = Distribution(RNG); - if (RandomIndex < TemporalProfTraces.size()) - TemporalProfTraces[RandomIndex] = std::move(Trace); - } - ++TemporalProfTraceStreamSize; -} - void InstrProfWriter::addTemporalProfileTraces( SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { + if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize) + TemporalProfTraces.truncate(TemporalProfTraceReservoirSize); for (auto &Trace : SrcTraces) if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); }); - // Assume that the source has the same reservoir size as the destination to - // avoid needing to record it in the indexed profile format. - bool IsDestSampled = - (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); - bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); - if (!IsDestSampled && IsSrcSampled) { - // If one of the traces are sampled, ensure that it belongs to Dest. - std::swap(TemporalProfTraces, SrcTraces); - std::swap(TemporalProfTraceStreamSize, SrcStreamSize); - std::swap(IsDestSampled, IsSrcSampled); - } - if (!IsSrcSampled) { - // If the source stream is not sampled, we add each source trace normally. - for (auto &Trace : SrcTraces) - addTemporalProfileTrace(std::move(Trace)); + // If there are no source traces, it is probably because + // --temporal-profile-max-trace-length=0 was set to deliberately remove all + // traces. In that case, we do not want to increase the stream size + if (SrcTraces.empty()) return; - } - // Otherwise, we find the traces that would have been removed if we added - // the whole source stream. - SmallSetVector<uint64_t, 8> IndicesToReplace; - for (uint64_t I = 0; I < SrcStreamSize; I++) { - std::uniform_int_distribution<uint64_t> Distribution( - 0, TemporalProfTraceStreamSize); + // Add traces until our reservoir is full or we run out of source traces + auto SrcTraceIt = SrcTraces.begin(); + while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize && + SrcTraceIt < SrcTraces.end()) + TemporalProfTraces.push_back(*SrcTraceIt++); + // Our reservoir is full, we need to sample the source stream + llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG); + for (uint64_t I = TemporalProfTraces.size(); + I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) { + std::uniform_int_distribution<uint64_t> Distribution(0, I); uint64_t RandomIndex = Distribution(RNG); if (RandomIndex < TemporalProfTraces.size()) - IndicesToReplace.insert(RandomIndex); - ++TemporalProfTraceStreamSize; + TemporalProfTraces[RandomIndex] = *SrcTraceIt++; } - // Then we insert a random sample of the source traces. - llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG); - for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces)) - TemporalProfTraces[Index] = std::move(Trace); + TemporalProfTraceStreamSize += SrcStreamSize; } void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index fe34037..70ac68a 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -256,6 +256,7 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { case llvm::Instruction::FPToUI: case llvm::Instruction::FPToSI: case llvm::Instruction::FPExt: + case llvm::Instruction::PtrToAddr: case llvm::Instruction::PtrToInt: case llvm::Instruction::IntToPtr: case llvm::Instruction::SIToFP: diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp index 956047c..1a81d18 100644 --- a/llvm/lib/SandboxIR/Instruction.cpp +++ b/llvm/lib/SandboxIR/Instruction.cpp @@ -1007,6 +1007,9 @@ static llvm::Instruction::CastOps getLLVMCastOp(Instruction::Opcode Opc) { return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::FPToSI); case Instruction::Opcode::FPExt: return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::FPExt); + case Instruction::Opcode::PtrToAddr: + return static_cast<llvm::Instruction::CastOps>( + llvm::Instruction::PtrToAddr); case Instruction::Opcode::PtrToInt: return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::PtrToInt); case Instruction::Opcode::IntToPtr: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e4aa8b8..e63b937 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1844,6 +1844,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, /*IsStore*/ true, /*IsUnitStrided*/ false, /*UsePtrVal*/ true); + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + // Operands are (vec, ..., vec, ptr, offset, mask, vl) + return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, + /*IsStore*/ true, + /*IsUnitStrided*/ false, /*UsePtrVal*/ true); case Intrinsic::riscv_vlm: return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, @@ -11084,69 +11095,118 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); } -SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = Op.getConstantOperandVal(1); +static SDValue +lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + bool IsStrided; switch (IntNo) { - default: - break; case Intrinsic::riscv_seg2_store_mask: case Intrinsic::riscv_seg3_store_mask: case Intrinsic::riscv_seg4_store_mask: case Intrinsic::riscv_seg5_store_mask: case Intrinsic::riscv_seg6_store_mask: case Intrinsic::riscv_seg7_store_mask: - case Intrinsic::riscv_seg8_store_mask: { - SDLoc DL(Op); - static const Intrinsic::ID VssegInts[] = { - Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, - Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, - Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, - Intrinsic::riscv_vsseg8_mask}; + case Intrinsic::riscv_seg8_store_mask: + IsStrided = false; + break; + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + IsStrided = true; + break; + default: + llvm_unreachable("unexpected intrinsic ID"); + } - // Operands: (chain, int_id, vec*, ptr, mask, vl) - unsigned NF = Op->getNumOperands() - 5; - assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op->getOperand(2).getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * - ContainerVT.getScalarSizeInBits(); - EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); + SDLoc DL(Op); + static const Intrinsic::ID VssegInts[] = { + Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, + Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, + Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, + Intrinsic::riscv_vsseg8_mask}; + static const Intrinsic::ID VsssegInts[] = { + Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask, + Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask, + Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask, + Intrinsic::riscv_vssseg8_mask}; + + // Operands: (chain, int_id, vec*, ptr, mask, vl) or + // (chain, int_id, vec*, ptr, stride, mask, vl) + unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5); + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getOperand(2).getSimpleValueType(); + MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); + unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * + ContainerVT.getScalarSizeInBits(); + EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); - SDValue VL = Op.getOperand(Op.getNumOperands() - 1); - SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); - MVT MaskVT = Mask.getSimpleValueType(); - MVT MaskContainerVT = - ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); - Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + SDValue VL = Op.getOperand(Op.getNumOperands() - 1); + SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); + MVT MaskVT = Mask.getSimpleValueType(); + MVT MaskContainerVT = + ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); - SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); - SDValue Ptr = Op->getOperand(NF + 2); + SDValue IntID = DAG.getTargetConstant( + IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT); + SDValue Ptr = Op->getOperand(NF + 2); - auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); + auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); - SDValue StoredVal = DAG.getUNDEF(VecTupTy); - for (unsigned i = 0; i < NF; i++) - StoredVal = DAG.getNode( - RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, - convertToScalableVector( - ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget), - DAG.getTargetConstant(i, DL, MVT::i32)); + SDValue StoredVal = DAG.getUNDEF(VecTupTy); + for (unsigned i = 0; i < NF; i++) + StoredVal = DAG.getNode( + RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, + convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i), + DAG, Subtarget), + DAG.getTargetConstant(i, DL, MVT::i32)); + + SmallVector<SDValue, 10> Ops = { + FixedIntrinsic->getChain(), + IntID, + StoredVal, + Ptr, + Mask, + VL, + DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; + // Insert the stride operand. + if (IsStrided) + Ops.insert(std::next(Ops.begin(), 4), + Op.getOperand(Op.getNumOperands() - 3)); + + return DAG.getMemIntrinsicNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, + FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); +} + +SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + break; + case Intrinsic::riscv_seg2_store_mask: + case Intrinsic::riscv_seg3_store_mask: + case Intrinsic::riscv_seg4_store_mask: + case Intrinsic::riscv_seg5_store_mask: + case Intrinsic::riscv_seg6_store_mask: + case Intrinsic::riscv_seg7_store_mask: + case Intrinsic::riscv_seg8_store_mask: + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG); - SDValue Ops[] = { - FixedIntrinsic->getChain(), - IntID, - StoredVal, - Ptr, - Mask, - VL, - DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; - - return DAG.getMemIntrinsicNode( - ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, - FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); - } case Intrinsic::riscv_sf_vc_xv_se: return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE); case Intrinsic::riscv_sf_vc_iv_se: diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 5541506..24ebbc3 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -524,16 +524,33 @@ foreach mx = SchedMxListW in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Latency of vsmul: e8/e16 = 4/4/5/8, e32 = 5/5/5/8, e64 = 7/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c; + // Latency of vsmul: e8/e16/e32 = 1/2/4/8, e64 = 4/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulOcc = ConstValueUntilLMULThenDoubleBase<"M1", 1, 4, mx>.c; + // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites + let Latency = VSMulLat, ReleaseAtCycles = [VSMulOcc] in { + defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; + } + + defvar VSShiftLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + defvar VSShiftOcc = ConstOneUntilMF2ThenDouble<mx>.c; + let Latency = VSShiftLat, ReleaseAtCycles = [VSShiftOcc] in { + defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + } } // 13. Vector Floating-Point Instructions diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp index 0ddc231..e9bf59c 100644 --- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp +++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp @@ -58,14 +58,55 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) { } // Compute alignment from known bits. + auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) { + KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); + unsigned TrailZ = + std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent); + return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + }; + + // Propagate alignment between loads and stores that originate from the + // same base pointer. + DenseMap<Value *, Align> BestBasePointerAligns; + auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) { + APInt OffsetFromBase(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); + PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true); + // Derive the base pointer alignment from the load/store alignment + // and the offset from the base pointer. + Align BasePointerAlign = + commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue()); + + auto [It, Inserted] = + BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign); + if (!Inserted) { + // If the stored base pointer alignment is better than the + // base pointer alignment we derived, we may be able to use it + // to improve the load/store alignment. If not, store the + // improved base pointer alignment for future iterations. + if (It->second > BasePointerAlign) { + Align BetterLoadStoreAlign = + commonAlignment(It->second, OffsetFromBase.getLimitedValue()); + return BetterLoadStoreAlign; + } + It->second = BasePointerAlign; + } + return LoadStoreAlign; + }; + for (BasicBlock &BB : F) { + // We need to reset the map for each block because alignment information + // can only be propagated from instruction A to B if A dominates B. + // This is because control flow (and exception throwing) could be dependent + // on the address (and its alignment) at runtime. Some sort of dominator + // tree approach could be better, but doing a simple forward pass through a + // single basic block is correct too. + BestBasePointerAligns.clear(); + for (Instruction &I : BB) { Changed |= tryToImproveAlign( DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) { - KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); - unsigned TrailZ = std::min(Known.countMinTrailingZeros(), - +Value::MaxAlignmentExponent); - return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + return std::max(InferFromKnownBits(I, PtrOp), + InferFromBasePointer(PtrOp, OldAlign)); }); } } diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index fcdb8a9..c68149b 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -263,6 +263,7 @@ static bool isUniformShape(Value *V) { case llvm::Instruction::FPExt: return true; case llvm::Instruction::AddrSpaceCast: + case CastInst::PtrToAddr: case CastInst::PtrToInt: case CastInst::IntToPtr: return false; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 39011e7..ec06a21 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12050,7 +12050,8 @@ static InstructionCost canConvertToFMA(ArrayRef<Value *> VL, for (auto [V, Op] : zip(VL, Operands.front())) { auto *I = dyn_cast<Instruction>(Op); if (!I || !I->hasOneUse()) { - FMACost += TTI.getInstructionCost(cast<Instruction>(V), CostKind); + if (auto *OpI = dyn_cast<Instruction>(V)) + FMACost += TTI.getInstructionCost(OpI, CostKind); if (I) FMACost += TTI.getInstructionCost(I, CostKind); continue; diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index f32d57f..e414c12 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -81,6 +81,7 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( case Instruction::Opcode::FPToUI: case Instruction::Opcode::FPToSI: case Instruction::Opcode::FPExt: + case Instruction::Opcode::PtrToAddr: case Instruction::Opcode::PtrToInt: case Instruction::Opcode::IntToPtr: case Instruction::Opcode::SIToFP: |