aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp3
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp3
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp2
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp1
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp1
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h1
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp2
-rw-r--r--llvm/lib/IR/ConstantFold.cpp1
-rw-r--r--llvm/lib/IR/ConstantRange.cpp1
-rw-r--r--llvm/lib/IR/Constants.cpp20
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp10
-rw-r--r--llvm/lib/IR/Globals.cpp1
-rw-r--r--llvm/lib/IR/Instruction.cpp1
-rw-r--r--llvm/lib/IR/Instructions.cpp52
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h27
-rw-r--r--llvm/lib/IR/Value.cpp46
-rw-r--r--llvm/lib/IR/Verifier.cpp31
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp63
-rw-r--r--llvm/lib/SandboxIR/Context.cpp1
-rw-r--r--llvm/lib/SandboxIR/Instruction.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp160
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td37
-rw-r--r--llvm/lib/Transforms/Scalar/InferAlignment.cpp49
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp1
31 files changed, 344 insertions, 189 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index dd98b62..c14cb9e 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1485,6 +1485,9 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
switch (Opcode) {
default:
llvm_unreachable("Missing case");
+ case Instruction::PtrToAddr:
+ // TODO: Add some of the ptrtoint folds here as well.
+ break;
case Instruction::PtrToInt:
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
Constant *FoldedValue = nullptr;
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 1e70228..b0e4b00 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -9147,7 +9147,8 @@ static bool matchTwoInputRecurrence(const PHINode *PN, InstTy *&Inst,
return false;
for (unsigned I = 0; I != 2; ++I) {
- if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I))) {
+ if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I));
+ Operation && Operation->getNumOperands() >= 2) {
Value *LHS = Operation->getOperand(0);
Value *RHS = Operation->getOperand(1);
if (LHS != PN && RHS != PN)
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 520c6a0..3d5bd61 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -928,6 +928,7 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(fptoui, FPToUI);
INSTKEYWORD(fptosi, FPToSI);
INSTKEYWORD(inttoptr, IntToPtr);
+ INSTKEYWORD(ptrtoaddr, PtrToAddr);
INSTKEYWORD(ptrtoint, PtrToInt);
INSTKEYWORD(bitcast, BitCast);
INSTKEYWORD(addrspacecast, AddrSpaceCast);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 13bef1f..1bc2906 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4273,6 +4273,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
case lltok::kw_bitcast:
case lltok::kw_addrspacecast:
case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoaddr:
case lltok::kw_ptrtoint: {
unsigned Opc = Lex.getUIntVal();
Type *DestTy = nullptr;
@@ -7310,6 +7311,7 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_fptoui:
case lltok::kw_fptosi:
case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoaddr:
case lltok::kw_ptrtoint:
return parseCast(Inst, PFS, KeywordVal);
case lltok::kw_fptrunc:
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 290d873..22a0d0f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1283,6 +1283,7 @@ static int getDecodedCastOpcode(unsigned Val) {
case bitc::CAST_SITOFP : return Instruction::SIToFP;
case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
case bitc::CAST_FPEXT : return Instruction::FPExt;
+ case bitc::CAST_PTRTOADDR: return Instruction::PtrToAddr;
case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
case bitc::CAST_BITCAST : return Instruction::BitCast;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 05680fa..a3f8254 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -647,6 +647,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) {
case Instruction::SIToFP : return bitc::CAST_SITOFP;
case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
case Instruction::FPExt : return bitc::CAST_FPEXT;
+ case Instruction::PtrToAddr: return bitc::CAST_PTRTOADDR;
case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
case Instruction::BitCast : return bitc::CAST_BITCAST;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c72b6e8..23a3543 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3657,6 +3657,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV,
break; // Error
}
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt: {
const DataLayout &DL = getDataLayout();
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index ca51b67..5f37890 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -1001,7 +1001,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
++CritIdx;
if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) {
- int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ int PDiff = (int)PNew - CriticalPSets[CritIdx].getUnitInc();
if (PDiff > 0) {
Delta.CriticalMax = PressureChange(i);
Delta.CriticalMax.setUnitInc(PDiff);
@@ -1191,7 +1191,7 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
++CritIdx;
if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
- int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ int CritInc = (int)MNew - CriticalPSets[CritIdx].getUnitInc();
if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) {
Delta.CriticalMax = PressureChange(PSetID);
Delta.CriticalMax.setUnitInc(CritInc);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0d1e954..48ab797 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3977,6 +3977,11 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
+void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
+ // FIXME: this is not correct for pointers with addr width != pointer width
+ visitPtrToInt(I);
+}
+
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index c251755..e0835e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -574,6 +574,7 @@ private:
void visitFPToSI(const User &I);
void visitUIToFP(const User &I);
void visitSIToFP(const User &I);
+ void visitPtrToAddr(const User &I);
void visitPtrToInt(const User &I);
void visitIntToPtr(const User &I);
void visitBitCast(const User &I);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index bf4c9f9..d80a229 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1893,6 +1893,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case SIToFP: return ISD::SINT_TO_FP;
case FPTrunc: return ISD::FP_ROUND;
case FPExt: return ISD::FP_EXTEND;
+ case PtrToAddr: return ISD::BITCAST;
case PtrToInt: return ISD::BITCAST;
case IntToPtr: return ISD::BITCAST;
case BitCast: return ISD::BITCAST;
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
index 9d84aa8..72308a3d 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
@@ -29,7 +29,7 @@ bool verifyRegisterValue(uint32_t RegisterValue) {
// This Range is reserverved, therefore invalid, according to the spec
// https://github.com/llvm/wg-hlsl/blob/main/proposals/0002-root-signature-in-clang.md#all-the-values-should-be-legal
bool verifyRegisterSpace(uint32_t RegisterSpace) {
- return !(RegisterSpace >= 0xFFFFFFF0 && RegisterSpace <= 0xFFFFFFFF);
+ return !(RegisterSpace >= 0xFFFFFFF0);
}
bool verifyRootDescriptorFlag(uint32_t Version, uint32_t FlagsVal) {
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index d4ad21e..6b202ba 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -254,6 +254,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
return FoldBitCast(V, DestTy);
case Instruction::AddrSpaceCast:
case Instruction::IntToPtr:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
return nullptr;
}
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index e09c139..2fcdbcc6 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -829,6 +829,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::IntToPtr:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::AddrSpaceCast:
// Conservatively return getFull set.
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index a3c725b..c7e3113a 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -1567,6 +1567,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
@@ -2223,6 +2224,8 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty,
llvm_unreachable("Invalid cast opcode");
case Instruction::Trunc:
return getTrunc(C, Ty, OnlyIfReduced);
+ case Instruction::PtrToAddr:
+ return getPtrToAddr(C, Ty, OnlyIfReduced);
case Instruction::PtrToInt:
return getPtrToInt(C, Ty, OnlyIfReduced);
case Instruction::IntToPtr:
@@ -2280,6 +2283,20 @@ Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
return getFoldedCast(Instruction::Trunc, C, Ty, OnlyIfReduced);
}
+Constant *ConstantExpr::getPtrToAddr(Constant *C, Type *DstTy,
+ bool OnlyIfReduced) {
+ assert(C->getType()->isPtrOrPtrVectorTy() &&
+ "PtrToAddr source must be pointer or pointer vector");
+ assert(DstTy->isIntOrIntVectorTy() &&
+ "PtrToAddr destination must be integer or integer vector");
+ assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+ if (isa<VectorType>(C->getType()))
+ assert(cast<VectorType>(C->getType())->getElementCount() ==
+ cast<VectorType>(DstTy)->getElementCount() &&
+ "Invalid cast between a different number of vector elements");
+ return getFoldedCast(Instruction::PtrToAddr, C, DstTy, OnlyIfReduced);
+}
+
Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
assert(C->getType()->isPtrOrPtrVectorTy() &&
@@ -2435,6 +2452,7 @@ bool ConstantExpr::isDesirableCastOp(unsigned Opcode) {
case Instruction::FPToSI:
return false;
case Instruction::Trunc:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
@@ -2457,6 +2475,7 @@ bool ConstantExpr::isSupportedCastOp(unsigned Opcode) {
case Instruction::FPToSI:
return false;
case Instruction::Trunc:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
@@ -3401,6 +3420,7 @@ Instruction *ConstantExpr::getAsInstruction() const {
switch (getOpcode()) {
case Instruction::Trunc:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index f1d4549..96065ed 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -57,15 +57,9 @@ DebugVariable::DebugVariable(const DbgVariableRecord *DVR)
DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned Column, uint64_t AtomGroup, uint8_t AtomRank,
ArrayRef<Metadata *> MDs, bool ImplicitCode)
- : MDNode(C, DILocationKind, Storage, MDs)
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
- ,
- AtomGroup(AtomGroup), AtomRank(AtomRank)
-#endif
-{
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
+ : MDNode(C, DILocationKind, Storage, MDs), AtomGroup(AtomGroup),
+ AtomRank(AtomRank) {
assert(AtomRank <= 7 && "AtomRank number should fit in 3 bits");
-#endif
if (AtomGroup)
C.updateDILocationAtomGroupWaterline(AtomGroup + 1);
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 7b799c7..11d33e2 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -404,6 +404,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases,
return findBaseObject(CE->getOperand(0), Aliases, Op);
}
case Instruction::IntToPtr:
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::BitCast:
case Instruction::GetElementPtr:
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index b7cd12a..4540268 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -817,6 +817,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case UIToFP: return "uitofp";
case SIToFP: return "sitofp";
case IntToPtr: return "inttoptr";
+ case PtrToAddr: return "ptrtoaddr";
case PtrToInt: return "ptrtoint";
case BitCast: return "bitcast";
case AddrSpaceCast: return "addrspacecast";
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index b896382..a1751c0 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2798,6 +2798,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
return false;
case Instruction::BitCast:
return true; // BitCast never modifies bits.
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
return DL.getIntPtrType(SrcTy)->getScalarSizeInBits() ==
DestTy->getScalarSizeInBits();
@@ -2855,26 +2856,29 @@ unsigned CastInst::isEliminableCastPair(
// same reason.
const unsigned numCastOps =
Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+ // clang-format off
static const uint8_t CastResults[numCastOps][numCastOps] = {
- // T F F U S F F P I B A -+
- // R Z S P P I I T P 2 N T S |
- // U E E 2 2 2 2 R E I T C C +- secondOp
- // N X X U S F F N X N 2 V V |
- // C T T I I P P C T T P T T -+
- { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc -+
- { 8, 1, 9,99,99, 2,17,99,99,99, 2, 3, 0}, // ZExt |
- { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt |
- { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI |
- { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI |
- { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp
- { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP |
- { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // FPTrunc |
- { 99,99,99, 2, 2,99,99, 8, 2,99,99, 4, 0}, // FPExt |
- { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt |
- { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr |
- { 5, 5, 5, 0, 0, 5, 5, 0, 0,16, 5, 1,14}, // BitCast |
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
+ // T F F U S F F P P I B A -+
+ // R Z S P P I I T P 2 2 N T S |
+ // U E E 2 2 2 2 R E I A T C C +- secondOp
+ // N X X U S F F N X N D 2 V V |
+ // C T T I I P P C T T R P T T -+
+ { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // Trunc -+
+ { 8, 1, 9,99,99, 2,17,99,99,99,99, 2, 3, 0}, // ZExt |
+ { 8, 0, 1,99,99, 0, 2,99,99,99,99, 0, 3, 0}, // SExt |
+ { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToUI |
+ { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToSI |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // UIToFP +- firstOp
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // SIToFP |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // FPTrunc |
+ { 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr |
+ { 99,99,99,99,99,99,99,99,99,11,99,99,15, 0}, // IntToPtr |
+ { 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
};
+ // clang-format on
// TODO: This logic could be encoded into the table above and handled in the
// switch below.
@@ -3046,6 +3050,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
+ case PtrToAddr: return new PtrToAddrInst (S, Ty, Name, InsertBefore);
case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
case BitCast:
@@ -3347,6 +3352,7 @@ CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) {
case Instruction::FPToSI:
return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcEC == DstEC;
+ case Instruction::PtrToAddr:
case Instruction::PtrToInt:
if (SrcEC != DstEC)
return false;
@@ -3460,6 +3466,12 @@ PtrToIntInst::PtrToIntInst(Value *S, Type *Ty, const Twine &Name,
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
+PtrToAddrInst::PtrToAddrInst(Value *S, Type *Ty, const Twine &Name,
+ InsertPosition InsertBefore)
+ : CastInst(Ty, PtrToAddr, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToAddr");
+}
+
IntToPtrInst::IntToPtrInst(Value *S, Type *Ty, const Twine &Name,
InsertPosition InsertBefore)
: CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
@@ -4427,6 +4439,10 @@ PtrToIntInst *PtrToIntInst::cloneImpl() const {
return new PtrToIntInst(getOperand(0), getType());
}
+PtrToAddrInst *PtrToAddrInst::cloneImpl() const {
+ return new PtrToAddrInst(getOperand(0), getType());
+}
+
IntToPtrInst *IntToPtrInst::cloneImpl() const {
return new IntToPtrInst(getOperand(0), getType());
}
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index aa2a60e..e03f993 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -312,10 +312,8 @@ template <> struct MDNodeKeyImpl<MDTuple> : MDNodeOpsKey {
template <> struct MDNodeKeyImpl<DILocation> {
Metadata *Scope;
Metadata *InlinedAt;
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
uint64_t AtomGroup : 61;
uint64_t AtomRank : 3;
-#endif
unsigned Line;
uint16_t Column;
bool ImplicitCode;
@@ -323,36 +321,24 @@ template <> struct MDNodeKeyImpl<DILocation> {
MDNodeKeyImpl(unsigned Line, uint16_t Column, Metadata *Scope,
Metadata *InlinedAt, bool ImplicitCode, uint64_t AtomGroup,
uint8_t AtomRank)
- : Scope(Scope), InlinedAt(InlinedAt),
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
- AtomGroup(AtomGroup), AtomRank(AtomRank),
-#endif
- Line(Line), Column(Column), ImplicitCode(ImplicitCode) {
- }
+ : Scope(Scope), InlinedAt(InlinedAt), AtomGroup(AtomGroup),
+ AtomRank(AtomRank), Line(Line), Column(Column),
+ ImplicitCode(ImplicitCode) {}
MDNodeKeyImpl(const DILocation *L)
: Scope(L->getRawScope()), InlinedAt(L->getRawInlinedAt()),
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
AtomGroup(L->getAtomGroup()), AtomRank(L->getAtomRank()),
-#endif
Line(L->getLine()), Column(L->getColumn()),
- ImplicitCode(L->isImplicitCode()) {
- }
+ ImplicitCode(L->isImplicitCode()) {}
bool isKeyOf(const DILocation *RHS) const {
return Line == RHS->getLine() && Column == RHS->getColumn() &&
Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt() &&
- ImplicitCode == RHS->isImplicitCode()
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
- && AtomGroup == RHS->getAtomGroup() &&
- AtomRank == RHS->getAtomRank();
-#else
- ;
-#endif
+ ImplicitCode == RHS->isImplicitCode() &&
+ AtomGroup == RHS->getAtomGroup() && AtomRank == RHS->getAtomRank();
}
unsigned getHashValue() const {
-#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
// Hashing AtomGroup and AtomRank substantially impacts performance whether
// Key Instructions is enabled or not. We can't detect whether it's enabled
// here cheaply; avoiding hashing zero values is a good approximation. This
@@ -363,7 +349,6 @@ template <> struct MDNodeKeyImpl<DILocation> {
if (AtomGroup || AtomRank)
return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode,
AtomGroup, (uint8_t)AtomRank);
-#endif
return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode);
}
};
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 129ca4a..5928c89 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -747,34 +747,28 @@ const Value *Value::stripAndAccumulateConstantOffsets(
// means when we construct GEPOffset, we need to use the size
// of GEP's pointer type rather than the size of the original
// pointer type.
- unsigned CurBitWidth = DL.getIndexTypeSizeInBits(V->getType());
- if (CurBitWidth == BitWidth) {
- if (!GEP->accumulateConstantOffset(DL, Offset, ExternalAnalysis))
- return V;
- } else {
- APInt GEPOffset(CurBitWidth, 0);
- if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis))
- return V;
+ APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0);
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis))
+ return V;
- // Stop traversal if the pointer offset wouldn't fit in the bit-width
- // provided by the Offset argument. This can happen due to AddrSpaceCast
- // stripping.
- if (GEPOffset.getSignificantBits() > BitWidth)
- return V;
+ // Stop traversal if the pointer offset wouldn't fit in the bit-width
+ // provided by the Offset argument. This can happen due to AddrSpaceCast
+ // stripping.
+ if (GEPOffset.getSignificantBits() > BitWidth)
+ return V;
- // External Analysis can return a result higher/lower than the value
- // represents. We need to detect overflow/underflow.
- APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth);
- if (!ExternalAnalysis) {
- Offset += GEPOffsetST;
- } else {
- bool Overflow = false;
- APInt OldOffset = Offset;
- Offset = Offset.sadd_ov(GEPOffsetST, Overflow);
- if (Overflow) {
- Offset = OldOffset;
- return V;
- }
+ // External Analysis can return a result higher/lower than the value
+ // represents. We need to detect overflow/underflow.
+ APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth);
+ if (!ExternalAnalysis) {
+ Offset += GEPOffsetST;
+ } else {
+ bool Overflow = false;
+ APInt OldOffset = Offset;
+ Offset = Offset.sadd_ov(GEPOffsetST, Overflow);
+ if (Overflow) {
+ Offset = OldOffset;
+ return V;
}
}
V = GEP->getPointerOperand();
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f5dcb5e..1d3c379 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -566,6 +566,8 @@ private:
void visitUIToFPInst(UIToFPInst &I);
void visitSIToFPInst(SIToFPInst &I);
void visitIntToPtrInst(IntToPtrInst &I);
+ void checkPtrToAddr(Type *SrcTy, Type *DestTy, const Value &V);
+ void visitPtrToAddrInst(PtrToAddrInst &I);
void visitPtrToIntInst(PtrToIntInst &I);
void visitBitCastInst(BitCastInst &I);
void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
@@ -834,6 +836,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
&GV);
Check(GV.getInitializer()->getType()->isSized(),
"Global variable initializer must be sized", &GV);
+ visitConstantExprsRecursively(GV.getInitializer());
// If the global has common linkage, it must have a zero initializer and
// cannot be constant.
if (GV.hasCommonLinkage()) {
@@ -2610,6 +2613,8 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) {
Check(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
CE->getType()),
"Invalid bitcast", CE);
+ else if (CE->getOpcode() == Instruction::PtrToAddr)
+ checkPtrToAddr(CE->getOperand(0)->getType(), CE->getType(), *CE);
}
void Verifier::visitConstantPtrAuth(const ConstantPtrAuth *CPA) {
@@ -3532,6 +3537,28 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
visitInstruction(I);
}
+void Verifier::checkPtrToAddr(Type *SrcTy, Type *DestTy, const Value &V) {
+ Check(SrcTy->isPtrOrPtrVectorTy(), "PtrToAddr source must be pointer", V);
+ Check(DestTy->isIntOrIntVectorTy(), "PtrToAddr result must be integral", V);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToAddr type mismatch",
+ V);
+
+ if (SrcTy->isVectorTy()) {
+ auto *VSrc = cast<VectorType>(SrcTy);
+ auto *VDest = cast<VectorType>(DestTy);
+ Check(VSrc->getElementCount() == VDest->getElementCount(),
+ "PtrToAddr vector length mismatch", V);
+ }
+
+ Type *AddrTy = DL.getAddressType(SrcTy);
+ Check(AddrTy == DestTy, "PtrToAddr result must be address width", V);
+}
+
+void Verifier::visitPtrToAddrInst(PtrToAddrInst &I) {
+ checkPtrToAddr(I.getOperand(0)->getType(), I.getType(), I);
+ visitInstruction(I);
+}
+
void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
// Get the source and destination types
Type *SrcTy = I.getOperand(0)->getType();
@@ -3547,7 +3574,7 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
auto *VSrc = cast<VectorType>(SrcTy);
auto *VDest = cast<VectorType>(DestTy);
Check(VSrc->getElementCount() == VDest->getElementCount(),
- "PtrToInt Vector width mismatch", &I);
+ "PtrToInt Vector length mismatch", &I);
}
visitInstruction(I);
@@ -3567,7 +3594,7 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
auto *VSrc = cast<VectorType>(SrcTy);
auto *VDest = cast<VectorType>(DestTy);
Check(VSrc->getElementCount() == VDest->getElementCount(),
- "IntToPtr Vector width mismatch", &I);
+ "IntToPtr Vector length mismatch", &I);
}
visitInstruction(I);
}
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7ca26aa..df807fc 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
DataAccessProfileData = std::move(DataAccessProfDataIn);
}
-void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
- assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
- assert(!Trace.FunctionNameRefs.empty());
- if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
- // Simply append the trace if we have not yet hit our reservoir size limit.
- TemporalProfTraces.push_back(std::move(Trace));
- } else {
- // Otherwise, replace a random trace in the stream.
- std::uniform_int_distribution<uint64_t> Distribution(
- 0, TemporalProfTraceStreamSize);
- uint64_t RandomIndex = Distribution(RNG);
- if (RandomIndex < TemporalProfTraces.size())
- TemporalProfTraces[RandomIndex] = std::move(Trace);
- }
- ++TemporalProfTraceStreamSize;
-}
-
void InstrProfWriter::addTemporalProfileTraces(
SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
+ if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
+ TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
for (auto &Trace : SrcTraces)
if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
- // Assume that the source has the same reservoir size as the destination to
- // avoid needing to record it in the indexed profile format.
- bool IsDestSampled =
- (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
- bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
- if (!IsDestSampled && IsSrcSampled) {
- // If one of the traces are sampled, ensure that it belongs to Dest.
- std::swap(TemporalProfTraces, SrcTraces);
- std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
- std::swap(IsDestSampled, IsSrcSampled);
- }
- if (!IsSrcSampled) {
- // If the source stream is not sampled, we add each source trace normally.
- for (auto &Trace : SrcTraces)
- addTemporalProfileTrace(std::move(Trace));
+ // If there are no source traces, it is probably because
+ // --temporal-profile-max-trace-length=0 was set to deliberately remove all
+ // traces. In that case, we do not want to increase the stream size
+ if (SrcTraces.empty())
return;
- }
- // Otherwise, we find the traces that would have been removed if we added
- // the whole source stream.
- SmallSetVector<uint64_t, 8> IndicesToReplace;
- for (uint64_t I = 0; I < SrcStreamSize; I++) {
- std::uniform_int_distribution<uint64_t> Distribution(
- 0, TemporalProfTraceStreamSize);
+ // Add traces until our reservoir is full or we run out of source traces
+ auto SrcTraceIt = SrcTraces.begin();
+ while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
+ SrcTraceIt < SrcTraces.end())
+ TemporalProfTraces.push_back(*SrcTraceIt++);
+ // Our reservoir is full, we need to sample the source stream
+ llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
+ for (uint64_t I = TemporalProfTraces.size();
+ I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
+ std::uniform_int_distribution<uint64_t> Distribution(0, I);
uint64_t RandomIndex = Distribution(RNG);
if (RandomIndex < TemporalProfTraces.size())
- IndicesToReplace.insert(RandomIndex);
- ++TemporalProfTraceStreamSize;
+ TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
}
- // Then we insert a random sample of the source traces.
- llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
- for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
- TemporalProfTraces[Index] = std::move(Trace);
+ TemporalProfTraceStreamSize += SrcStreamSize;
}
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp
index fe34037..70ac68a 100644
--- a/llvm/lib/SandboxIR/Context.cpp
+++ b/llvm/lib/SandboxIR/Context.cpp
@@ -256,6 +256,7 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
case llvm::Instruction::FPToUI:
case llvm::Instruction::FPToSI:
case llvm::Instruction::FPExt:
+ case llvm::Instruction::PtrToAddr:
case llvm::Instruction::PtrToInt:
case llvm::Instruction::IntToPtr:
case llvm::Instruction::SIToFP:
diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp
index 956047c..1a81d18 100644
--- a/llvm/lib/SandboxIR/Instruction.cpp
+++ b/llvm/lib/SandboxIR/Instruction.cpp
@@ -1007,6 +1007,9 @@ static llvm::Instruction::CastOps getLLVMCastOp(Instruction::Opcode Opc) {
return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::FPToSI);
case Instruction::Opcode::FPExt:
return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::FPExt);
+ case Instruction::Opcode::PtrToAddr:
+ return static_cast<llvm::Instruction::CastOps>(
+ llvm::Instruction::PtrToAddr);
case Instruction::Opcode::PtrToInt:
return static_cast<llvm::Instruction::CastOps>(llvm::Instruction::PtrToInt);
case Instruction::Opcode::IntToPtr:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e4aa8b8..e63b937 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1844,6 +1844,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ // Operands are (vec, ..., vec, ptr, offset, mask, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_vlm:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
/*IsStore*/ false,
@@ -11084,69 +11095,118 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntNo = Op.getConstantOperandVal(1);
+static SDValue
+lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ bool IsStrided;
switch (IntNo) {
- default:
- break;
case Intrinsic::riscv_seg2_store_mask:
case Intrinsic::riscv_seg3_store_mask:
case Intrinsic::riscv_seg4_store_mask:
case Intrinsic::riscv_seg5_store_mask:
case Intrinsic::riscv_seg6_store_mask:
case Intrinsic::riscv_seg7_store_mask:
- case Intrinsic::riscv_seg8_store_mask: {
- SDLoc DL(Op);
- static const Intrinsic::ID VssegInts[] = {
- Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
- Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
- Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
- Intrinsic::riscv_vsseg8_mask};
+ case Intrinsic::riscv_seg8_store_mask:
+ IsStrided = false;
+ break;
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ IsStrided = true;
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic ID");
+ }
- // Operands: (chain, int_id, vec*, ptr, mask, vl)
- unsigned NF = Op->getNumOperands() - 5;
- assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
- MVT XLenVT = Subtarget.getXLenVT();
- MVT VT = Op->getOperand(2).getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
- unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
- ContainerVT.getScalarSizeInBits();
- EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+ SDLoc DL(Op);
+ static const Intrinsic::ID VssegInts[] = {
+ Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+ Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+ Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+ Intrinsic::riscv_vsseg8_mask};
+ static const Intrinsic::ID VsssegInts[] = {
+ Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
+ Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
+ Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
+ Intrinsic::riscv_vssseg8_mask};
+
+ // Operands: (chain, int_id, vec*, ptr, mask, vl) or
+ // (chain, int_id, vec*, ptr, stride, mask, vl)
+ unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getOperand(2).getSimpleValueType();
+ MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
+ ContainerVT.getScalarSizeInBits();
+ EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
- SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
- SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
- MVT MaskVT = Mask.getSimpleValueType();
- MVT MaskContainerVT =
- ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
- Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
+ MVT MaskVT = Mask.getSimpleValueType();
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
- SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
- SDValue Ptr = Op->getOperand(NF + 2);
+ SDValue IntID = DAG.getTargetConstant(
+ IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
+ SDValue Ptr = Op->getOperand(NF + 2);
- auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
+ auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
- SDValue StoredVal = DAG.getUNDEF(VecTupTy);
- for (unsigned i = 0; i < NF; i++)
- StoredVal = DAG.getNode(
- RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
- convertToScalableVector(
- ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
- DAG.getTargetConstant(i, DL, MVT::i32));
+ SDValue StoredVal = DAG.getUNDEF(VecTupTy);
+ for (unsigned i = 0; i < NF; i++)
+ StoredVal = DAG.getNode(
+ RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
+ convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
+ DAG, Subtarget),
+ DAG.getTargetConstant(i, DL, MVT::i32));
+
+ SmallVector<SDValue, 10> Ops = {
+ FixedIntrinsic->getChain(),
+ IntID,
+ StoredVal,
+ Ptr,
+ Mask,
+ VL,
+ DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
+ // Insert the stride operand.
+ if (IsStrided)
+ Ops.insert(std::next(Ops.begin(), 4),
+ Op.getOperand(Op.getNumOperands() - 3));
+
+ return DAG.getMemIntrinsicNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
+ FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
+}
+
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_seg2_store_mask:
+ case Intrinsic::riscv_seg3_store_mask:
+ case Intrinsic::riscv_seg4_store_mask:
+ case Intrinsic::riscv_seg5_store_mask:
+ case Intrinsic::riscv_seg6_store_mask:
+ case Intrinsic::riscv_seg7_store_mask:
+ case Intrinsic::riscv_seg8_store_mask:
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
- SDValue Ops[] = {
- FixedIntrinsic->getChain(),
- IntID,
- StoredVal,
- Ptr,
- Mask,
- VL,
- DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
-
- return DAG.getMemIntrinsicNode(
- ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
- FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
- }
case Intrinsic::riscv_sf_vc_xv_se:
return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
case Intrinsic::riscv_sf_vc_iv_se:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 5541506..24ebbc3 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -524,16 +524,33 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ // Latency of vsmul: e8/e16 = 4/4/5/8, e32 = 5/5/5/8, e64 = 7/8/16/32
+ // We use the worst-case until we can split the SEW.
+ defvar VSMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c;
+ // Latency of vsmul: e8/e16/e32 = 1/2/4/8, e64 = 4/8/16/32
+ // We use the worst-case until we can split the SEW.
+ defvar VSMulOcc = ConstValueUntilLMULThenDoubleBase<"M1", 1, 4, mx>.c;
+ // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites
+ let Latency = VSMulLat, ReleaseAtCycles = [VSMulOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ defvar VSShiftLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VSShiftOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VSShiftLat, ReleaseAtCycles = [VSShiftOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// 13. Vector Floating-Point Instructions
diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
index 0ddc231..e9bf59c 100644
--- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
@@ -58,14 +58,55 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
}
// Compute alignment from known bits.
+ auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) {
+ KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
+ unsigned TrailZ =
+ std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent);
+ return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
+ };
+
+ // Propagate alignment between loads and stores that originate from the
+ // same base pointer.
+ DenseMap<Value *, Align> BestBasePointerAligns;
+ auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) {
+ APInt OffsetFromBase(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true);
+ // Derive the base pointer alignment from the load/store alignment
+ // and the offset from the base pointer.
+ Align BasePointerAlign =
+ commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue());
+
+ auto [It, Inserted] =
+ BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign);
+ if (!Inserted) {
+ // If the stored base pointer alignment is better than the
+ // base pointer alignment we derived, we may be able to use it
+ // to improve the load/store alignment. If not, store the
+ // improved base pointer alignment for future iterations.
+ if (It->second > BasePointerAlign) {
+ Align BetterLoadStoreAlign =
+ commonAlignment(It->second, OffsetFromBase.getLimitedValue());
+ return BetterLoadStoreAlign;
+ }
+ It->second = BasePointerAlign;
+ }
+ return LoadStoreAlign;
+ };
+
for (BasicBlock &BB : F) {
+ // We need to reset the map for each block because alignment information
+ // can only be propagated from instruction A to B if A dominates B.
+ // This is because control flow (and exception throwing) could be dependent
+ // on the address (and its alignment) at runtime. Some sort of dominator
+ // tree approach could be better, but doing a simple forward pass through a
+ // single basic block is correct too.
+ BestBasePointerAligns.clear();
+
for (Instruction &I : BB) {
Changed |= tryToImproveAlign(
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
- KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
- unsigned TrailZ = std::min(Known.countMinTrailingZeros(),
- +Value::MaxAlignmentExponent);
- return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
+ return std::max(InferFromKnownBits(I, PtrOp),
+ InferFromBasePointer(PtrOp, OldAlign));
});
}
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index fcdb8a9..c68149b 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -263,6 +263,7 @@ static bool isUniformShape(Value *V) {
case llvm::Instruction::FPExt:
return true;
case llvm::Instruction::AddrSpaceCast:
+ case CastInst::PtrToAddr:
case CastInst::PtrToInt:
case CastInst::IntToPtr:
return false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 39011e7..ec06a21 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12050,7 +12050,8 @@ static InstructionCost canConvertToFMA(ArrayRef<Value *> VL,
for (auto [V, Op] : zip(VL, Operands.front())) {
auto *I = dyn_cast<Instruction>(Op);
if (!I || !I->hasOneUse()) {
- FMACost += TTI.getInstructionCost(cast<Instruction>(V), CostKind);
+ if (auto *OpI = dyn_cast<Instruction>(V))
+ FMACost += TTI.getInstructionCost(OpI, CostKind);
if (I)
FMACost += TTI.getInstructionCost(I, CostKind);
continue;
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
index f32d57f..e414c12 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
@@ -81,6 +81,7 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes(
case Instruction::Opcode::FPToUI:
case Instruction::Opcode::FPToSI:
case Instruction::Opcode::FPExt:
+ case Instruction::Opcode::PtrToAddr:
case Instruction::Opcode::PtrToInt:
case Instruction::Opcode::IntToPtr:
case Instruction::Opcode::SIToFP: