diff options
Diffstat (limited to 'llvm/utils')
22 files changed, 602 insertions, 224 deletions
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 7f90d6b..a280604 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -242,11 +242,10 @@ public: SmallVector<const Record *, 1024> AllRuntimeLibcallImpls( AllRuntimeLibcallImplsRaw); - // Sort by libcall impl name, not the enum name. This keeps the order - // suitable for using the name table for libcall recognition binary search. - llvm::sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { - return A->getValueAsString("LibCallFuncName") < - B->getValueAsString("LibCallFuncName"); + // Sort by libcall impl name and secondarily by the enum name. + sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { + return std::pair(A->getValueAsString("LibCallFuncName"), A->getName()) < + std::pair(B->getValueAsString("LibCallFuncName"), B->getName()); }); RuntimeLibcallImplDefList.reserve(AllRuntimeLibcallImpls.size()); diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index afc892b..89c175b 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -86,16 +86,22 @@ namespace { class CompressInstEmitter { struct OpData { enum MapKind { Operand, Imm, Reg } Kind; - union { + // Info for an operand. + struct OpndInfo { + // Record from the Dag. + const Record *DagRec; // Operand number mapped to. - unsigned OpNo; + unsigned Idx; + // Tied operand index within the instruction. + int TiedOpIdx; + }; + union { + OpndInfo OpInfo; // Integer immediate value. int64_t ImmVal; // Physical register. const Record *RegRec; }; - // Tied operand index within the instruction. - int TiedOpIdx = -1; }; struct ArgData { unsigned DAGOpNo; @@ -217,12 +223,8 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, Inst.Operands.back().MIOperandNo + Inst.Operands.back().MINumOperands; OperandMap.grow(NumMIOperands); - // TiedCount keeps track of the number of operands skipped in Inst - // operands list to get to the corresponding Dag operand. This is - // necessary because the number of operands in Inst might be greater - // than number of operands in the Dag due to how tied operands - // are represented. - unsigned TiedCount = 0; + // Tied operands are not represented in the DAG so we count them separately. + unsigned DAGOpNo = 0; unsigned OpNo = 0; for (const auto &Opnd : Inst.Operands) { int TiedOpIdx = Opnd.getTiedRegister(); @@ -231,15 +233,25 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, // Set the entry in OperandMap for the tied operand we're skipping. OperandMap[OpNo] = OperandMap[TiedOpIdx]; ++OpNo; - ++TiedCount; + + // Source instructions can have at most 1 tied operand. + if (IsSourceInst && (OpNo - DAGOpNo > 1)) + PrintFatalError(Rec->getLoc(), + "Input operands for Inst '" + Inst.TheDef->getName() + + "' and input Dag operand count mismatch"); + continue; } - for (unsigned SubOp = 0; SubOp != Opnd.MINumOperands; ++SubOp, ++OpNo) { - unsigned DAGOpNo = OpNo - TiedCount; + for (unsigned SubOp = 0; SubOp != Opnd.MINumOperands; + ++SubOp, ++OpNo, ++DAGOpNo) { const Record *OpndRec = Opnd.Rec; if (Opnd.MINumOperands > 1) OpndRec = cast<DefInit>(Opnd.MIOperandInfo->getArg(SubOp))->getDef(); + if (DAGOpNo >= Dag->getNumArgs()) + PrintFatalError(Rec->getLoc(), "Inst '" + Inst.TheDef->getName() + + "' and Dag operand count mismatch"); + if (const auto *DI = dyn_cast<DefInit>(Dag->getArg(DAGOpNo))) { if (DI->getDef()->isSubClassOf("Register")) { // Check if the fixed register belongs to the Register class. @@ -267,9 +279,34 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, "' in the corresponding instruction operand!"); OperandMap[OpNo].Kind = OpData::Operand; + OperandMap[OpNo].OpInfo.DagRec = DI->getDef(); + OperandMap[OpNo].OpInfo.TiedOpIdx = -1; + + // Create a mapping between the operand name in the Dag (e.g. $rs1) and + // its index in the list of Dag operands and check that operands with + // the same name have the same type. For example in 'C_ADD $rs1, $rs2' + // we generate the mapping $rs1 --> 0, $rs2 ---> 1. If the operand + // appears twice in the same Dag (tied in the compressed instruction), + // we note the previous index in the TiedOpIdx field. + StringRef ArgName = Dag->getArgNameStr(DAGOpNo); + if (ArgName.empty()) + continue; + + if (IsSourceInst) { + auto It = Operands.find(ArgName); + if (It != Operands.end()) { + OperandMap[OpNo].OpInfo.TiedOpIdx = It->getValue().MIOpNo; + if (OperandMap[It->getValue().MIOpNo].OpInfo.DagRec != DI->getDef()) + PrintFatalError(Rec->getLoc(), + "Input Operand '" + ArgName + + "' has a mismatched tied operand!"); + } + } + + Operands[ArgName] = {DAGOpNo, OpNo}; } else if (const auto *II = dyn_cast<IntInit>(Dag->getArg(DAGOpNo))) { // Validate that corresponding instruction operand expects an immediate. - if (OpndRec->isSubClassOf("RegisterClass")) + if (!OpndRec->isSubClassOf("Operand")) PrintFatalError(Rec->getLoc(), "Error in Dag '" + Dag->getAsString() + "' Found immediate: '" + II->getAsString() + @@ -286,69 +323,13 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, } else { llvm_unreachable("Unhandled CompressPat argument type!"); } - - // Create a mapping between the operand name in the Dag (e.g. $rs1) and - // its index in the list of Dag operands and check that operands with the - // same name have the same type. For example in 'C_ADD $rs1, $rs2' we - // generate the mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears - // twice in the same Dag (tied in the compressed instruction), we note - // the previous index in the TiedOpIdx field. - StringRef ArgName = Dag->getArgNameStr(DAGOpNo); - if (ArgName.empty()) - continue; - - if (IsSourceInst) { - auto It = Operands.find(ArgName); - if (It != Operands.end()) { - OperandMap[OpNo].TiedOpIdx = It->getValue().MIOpNo; - if (!validateArgsTypes(Dag->getArg(It->getValue().DAGOpNo), - Dag->getArg(DAGOpNo))) - PrintFatalError(Rec->getLoc(), - "Input Operand '" + ArgName + - "' has a mismatched tied operand!"); - } - } - - Operands[ArgName] = {DAGOpNo, OpNo}; } } -} - -// Verify the Dag operand count is enough to build an instruction. -static bool verifyDagOpCount(const CodeGenInstruction &Inst, const DagInit *Dag, - bool IsSource) { - unsigned NumMIOperands = 0; - - unsigned TiedOpCount = 0; - for (const auto &Op : Inst.Operands) { - NumMIOperands += Op.MINumOperands; - if (Op.getTiedRegister() != -1) - TiedOpCount++; - } - if (Dag->getNumArgs() == NumMIOperands) - return true; - - // Source instructions are non compressed instructions and have at most one - // tied operand. - if (IsSource && (TiedOpCount > 1)) - PrintFatalError(Inst.TheDef->getLoc(), - "Input operands for Inst '" + Inst.TheDef->getName() + - "' and input Dag operand count mismatch"); - - // The Dag can't have more arguments than the Instruction. - if (Dag->getNumArgs() > NumMIOperands) - PrintFatalError(Inst.TheDef->getLoc(), - "Inst '" + Inst.TheDef->getName() + - "' and Dag operand count mismatch"); - - // The Instruction might have tied operands so the Dag might have - // a fewer operand count. - if (Dag->getNumArgs() != (NumMIOperands - TiedOpCount)) - PrintFatalError(Inst.TheDef->getLoc(), - "Inst '" + Inst.TheDef->getName() + - "' and Dag operand count mismatch"); - return true; + // We shouldn't have extra Dag operands. + if (DAGOpNo != Dag->getNumArgs()) + PrintFatalError(Rec->getLoc(), "Inst '" + Inst.TheDef->getName() + + "' and Dag operand count mismatch"); } // Check that all names in the source DAG appear in the destionation DAG. @@ -398,8 +379,9 @@ void CompressInstEmitter::createInstOperandMapping( if (DestOperandMap[OpNo].Kind == OpData::Operand) // No need to fill the SourceOperandMap here since it was mapped to // destination operand 'TiedInstOpIdx' in a previous iteration. - LLVM_DEBUG(dbgs() << " " << DestOperandMap[OpNo].OpNo << " ====> " - << OpNo << " Dest operand tied with operand '" + LLVM_DEBUG(dbgs() << " " << DestOperandMap[OpNo].OpInfo.Idx + << " ====> " << OpNo + << " Dest operand tied with operand '" << TiedInstOpIdx << "'\n"); ++OpNo; continue; @@ -424,8 +406,8 @@ void CompressInstEmitter::createInstOperandMapping( "Incorrect operand mapping detected!\n"); unsigned SourceOpNo = SourceOp->getValue().MIOpNo; - DestOperandMap[OpNo].OpNo = SourceOpNo; - SourceOperandMap[SourceOpNo].OpNo = OpNo; + DestOperandMap[OpNo].OpInfo.Idx = SourceOpNo; + SourceOperandMap[SourceOpNo].OpInfo.Idx = OpNo; LLVM_DEBUG(dbgs() << " " << SourceOpNo << " ====> " << OpNo << "\n"); } } @@ -463,7 +445,6 @@ void CompressInstEmitter::evaluateCompressPat(const Record *Rec) { // Checking we are transforming from compressed to uncompressed instructions. const Record *SourceOperator = SourceDag->getOperatorAsDef(Rec->getLoc()); CodeGenInstruction SourceInst(SourceOperator); - verifyDagOpCount(SourceInst, SourceDag, true); // Validate output Dag operands. const DagInit *DestDag = Rec->getValueAsDag("Output"); @@ -472,7 +453,6 @@ void CompressInstEmitter::evaluateCompressPat(const Record *Rec) { const Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc()); CodeGenInstruction DestInst(DestOperator); - verifyDagOpCount(DestInst, DestDag, false); if (SourceOperator->getValueAsInt("Size") <= DestOperator->getValueAsInt("Size")) @@ -586,8 +566,6 @@ static void printPredicates(ArrayRef<const Record *> Predicates, StringRef Name, static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr, StringRef CodeStr) { - // Remove first indentation and last '&&'. - CondStr = CondStr.drop_front(8).drop_back(4); CombinedStream.indent(4) << "if (" << CondStr << ") {\n"; CombinedStream << CodeStr; CombinedStream.indent(4) << " return true;\n"; @@ -668,7 +646,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, StringRef PrevOp; StringRef CurOp; CaseStream << " switch (MI.getOpcode()) {\n"; - CaseStream << " default: return false;\n"; + CaseStream << " default: return false;\n"; bool CompressOrCheck = EType == EmitterType::Compress || EType == EmitterType::CheckCompress; @@ -681,7 +659,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, .str() : ""; - for (auto &CompressPat : CompressPatterns) { + for (const auto &CompressPat : CompressPatterns) { if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly) continue; @@ -689,23 +667,25 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, std::string CodeString; raw_string_ostream CondStream(CondString); raw_string_ostream CodeStream(CodeString); - CodeGenInstruction &Source = + const CodeGenInstruction &Source = CompressOrCheck ? CompressPat.Source : CompressPat.Dest; - CodeGenInstruction &Dest = + const CodeGenInstruction &Dest = CompressOrCheck ? CompressPat.Dest : CompressPat.Source; - IndexedMap<OpData> SourceOperandMap = CompressOrCheck - ? CompressPat.SourceOperandMap - : CompressPat.DestOperandMap; - IndexedMap<OpData> &DestOperandMap = CompressOrCheck - ? CompressPat.DestOperandMap - : CompressPat.SourceOperandMap; + const IndexedMap<OpData> &SourceOperandMap = + CompressOrCheck ? CompressPat.SourceOperandMap + : CompressPat.DestOperandMap; + const IndexedMap<OpData> &DestOperandMap = + CompressOrCheck ? CompressPat.DestOperandMap + : CompressPat.SourceOperandMap; CurOp = Source.TheDef->getName(); // Check current and previous opcode to decide to continue or end a case. if (CurOp != PrevOp) { - if (!PrevOp.empty()) - CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n"; - CaseStream.indent(4) << "case " + TargetName + "::" + CurOp + ": {\n"; + if (!PrevOp.empty()) { + CaseStream.indent(4) << "break;\n"; + CaseStream.indent(2) << "} // case " + PrevOp + "\n"; + } + CaseStream.indent(2) << "case " + TargetName + "::" + CurOp + ": {\n"; } std::set<std::pair<bool, StringRef>> FeaturesSet; @@ -722,17 +702,18 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, }); getReqFeatures(FeaturesSet, AnyOfFeatureSets, ReqFeatures); + ListSeparator CondSep(" &&\n "); + // Emit checks for all required features. for (auto &Op : FeaturesSet) { StringRef Not = Op.first ? "!" : ""; - CondStream.indent(8) << Not << "STI.getFeatureBits()[" << TargetName - << "::" << Op.second << "]" - << " &&\n"; + CondStream << CondSep << Not << "STI.getFeatureBits()[" << TargetName + << "::" << Op.second << "]"; } // Emit checks for all required feature groups. for (auto &Set : AnyOfFeatureSets) { - CondStream.indent(8) << "("; + CondStream << CondSep << "("; for (auto &Op : Set) { bool IsLast = &Op == &*Set.rbegin(); StringRef Not = Op.first ? "!" : ""; @@ -741,41 +722,43 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, if (!IsLast) CondStream << " || "; } - CondStream << ") &&\n"; + CondStream << ")"; } // Start Source Inst operands validation. unsigned OpNo = 0; for (const auto &SourceOperand : Source.Operands) { - if (SourceOperandMap[OpNo].TiedOpIdx != -1) { - if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass")) - CondStream.indent(8) - << "(MI.getOperand(" << OpNo << ").isReg()) && (MI.getOperand(" - << SourceOperandMap[OpNo].TiedOpIdx << ").isReg()) &&\n" - << indent(8) << "(MI.getOperand(" << OpNo - << ").getReg() == MI.getOperand(" - << SourceOperandMap[OpNo].TiedOpIdx << ").getReg()) &&\n"; - else - PrintFatalError("Unexpected tied operand types!"); - } for (unsigned SubOp = 0; SubOp != SourceOperand.MINumOperands; ++SubOp) { // Check for fixed immediates\registers in the source instruction. switch (SourceOperandMap[OpNo].Kind) { case OpData::Operand: + if (SourceOperandMap[OpNo].OpInfo.TiedOpIdx != -1) { + if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass")) + CondStream << CondSep << "MI.getOperand(" << OpNo + << ").isReg() && MI.getOperand(" + << SourceOperandMap[OpNo].OpInfo.TiedOpIdx + << ").isReg()" << CondSep << "(MI.getOperand(" << OpNo + << ").getReg() == MI.getOperand(" + << SourceOperandMap[OpNo].OpInfo.TiedOpIdx + << ").getReg())"; + else + PrintFatalError("Unexpected tied operand types!"); + } + // We don't need to do anything for source instruction operand checks. break; case OpData::Imm: - CondStream.indent(8) - << "(MI.getOperand(" << OpNo << ").isImm()) &&\n" - << " (MI.getOperand(" << OpNo - << ").getImm() == " << SourceOperandMap[OpNo].ImmVal << ") &&\n"; + CondStream << CondSep << "MI.getOperand(" << OpNo << ").isImm()" + << CondSep << "(MI.getOperand(" << OpNo + << ").getImm() == " << SourceOperandMap[OpNo].ImmVal + << ")"; break; case OpData::Reg: { const Record *Reg = SourceOperandMap[OpNo].RegRec; - CondStream.indent(8) << "(MI.getOperand(" << OpNo << ").isReg()) &&\n" - << indent(8) << "(MI.getOperand(" << OpNo - << ").getReg() == " << TargetName - << "::" << Reg->getName() << ") &&\n"; + CondStream << CondSep << "MI.getOperand(" << OpNo << ").isReg()" + << CondSep << "(MI.getOperand(" << OpNo + << ").getReg() == " << TargetName << "::" << Reg->getName() + << ")"; break; } } @@ -799,27 +782,27 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, switch (DestOperandMap[OpNo].Kind) { case OpData::Operand: { - unsigned OpIdx = DestOperandMap[OpNo].OpNo; + unsigned OpIdx = DestOperandMap[OpNo].OpInfo.Idx; + const Record *DagRec = DestOperandMap[OpNo].OpInfo.DagRec; // Check that the operand in the Source instruction fits // the type for the Dest instruction. - if (DestRec->isSubClassOf("RegisterClass") || - DestRec->isSubClassOf("RegisterOperand")) { - auto *ClassRec = DestRec->isSubClassOf("RegisterClass") - ? DestRec - : DestRec->getValueAsDef("RegClass"); + if (DagRec->isSubClassOf("RegisterClass") || + DagRec->isSubClassOf("RegisterOperand")) { + auto *ClassRec = DagRec->isSubClassOf("RegisterClass") + ? DagRec + : DagRec->getValueAsDef("RegClass"); // This is a register operand. Check the register class. // Don't check register class if this is a tied operand, it was done - // for the operand its tied to. + // for the operand it's tied to. if (DestOperand.getTiedRegister() == -1) { - CondStream.indent(8) << "MI.getOperand(" << OpIdx << ").isReg()"; + CondStream << CondSep << "MI.getOperand(" << OpIdx << ").isReg()"; if (EType == EmitterType::CheckCompress) CondStream << " && MI.getOperand(" << OpIdx << ").getReg().isPhysical()"; - CondStream << " &&\n" - << indent(8) << TargetName << "MCRegisterClasses[" + CondStream << CondSep << TargetName << "MCRegisterClasses[" << TargetName << "::" << ClassRec->getName() << "RegClassID].contains(MI.getOperand(" << OpIdx - << ").getReg()) &&\n"; + << ").getReg())"; } if (CompressOrUncompress) @@ -829,19 +812,35 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, // Handling immediate operands. if (CompressOrUncompress) { unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates, - DestRec, "MCOperandPredicate"); - CondStream.indent(8) << ValidatorName << "(" - << "MI.getOperand(" << OpIdx << "), STI, " - << Entry << ") &&\n"; + DagRec, "MCOperandPredicate"); + CondStream << CondSep << ValidatorName << "(" + << "MI.getOperand(" << OpIdx << "), STI, " << Entry + << " /* " << DagRec->getName() << " */)"; + // Also check DestRec if different than DagRec. + if (DagRec != DestRec) { + Entry = getPredicates(MCOpPredicateMap, MCOpPredicates, DestRec, + "MCOperandPredicate"); + CondStream << CondSep << ValidatorName << "(" + << "MI.getOperand(" << OpIdx << "), STI, " << Entry + << " /* " << DestRec->getName() << " */)"; + } } else { unsigned Entry = - getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, DestRec, + getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, DagRec, "ImmediateCode"); - CondStream.indent(8) - << "MI.getOperand(" << OpIdx << ").isImm() &&\n"; - CondStream.indent(8) << TargetName << "ValidateMachineOperand(" - << "MI.getOperand(" << OpIdx << "), &STI, " - << Entry << ") &&\n"; + CondStream << CondSep << "MI.getOperand(" << OpIdx << ").isImm()"; + CondStream << CondSep << TargetName << "ValidateMachineOperand(" + << "MI.getOperand(" << OpIdx << "), &STI, " << Entry + << " /* " << DagRec->getName() << " */)"; + if (DagRec != DestRec) { + Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, + DestRec, "ImmediateCode"); + CondStream << CondSep << "MI.getOperand(" << OpIdx + << ").isImm()"; + CondStream << CondSep << TargetName << "ValidateMachineOperand(" + << "MI.getOperand(" << OpIdx << "), &STI, " << Entry + << " /* " << DestRec->getName() << " */)"; + } } if (CompressOrUncompress) CodeStream.indent(6) @@ -853,19 +852,18 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, if (CompressOrUncompress) { unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates, DestRec, "MCOperandPredicate"); - CondStream.indent(8) - << ValidatorName << "(" - << "MCOperand::createImm(" << DestOperandMap[OpNo].Imm - << "), STI, " << Entry << ") &&\n"; + CondStream << CondSep << ValidatorName << "(" + << "MCOperand::createImm(" << DestOperandMap[OpNo].ImmVal + << "), STI, " << Entry << " /* " << DestRec->getName() + << " */)"; } else { unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, DestRec, "ImmediateCode"); - CondStream.indent(8) - << TargetName - << "ValidateMachineOperand(MachineOperand::CreateImm(" - << DestOperandMap[OpNo].ImmVal << "), &STI, " << Entry - << ") &&\n"; + CondStream << CondSep << TargetName + << "ValidateMachineOperand(MachineOperand::CreateImm(" + << DestOperandMap[OpNo].ImmVal << "), &STI, " << Entry + << " /* " << DestRec->getName() << " */)"; } if (CompressOrUncompress) CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createImm(" @@ -889,9 +887,10 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, mergeCondAndCode(CaseStream, CondString, CodeString); PrevOp = CurOp; } - Func << CaseString << "\n"; + Func << CaseString; + Func.indent(4) << "break;\n"; // Close brace for the last case. - Func.indent(4) << "} // case " << CurOp << "\n"; + Func.indent(2) << "} // case " << CurOp << "\n"; Func.indent(2) << "} // switch\n"; Func.indent(2) << "return false;\n}\n"; diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index f028fcd..6f72b51 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -248,46 +248,50 @@ void InstrInfoEmitter::emitOperandNameMappings( /// scan of the instructions below. // Map of operand names to their ID. - std::map<StringRef, unsigned> OperandNameToID; - // Map from operand name enum value -> ID. - std::vector<unsigned> OperandEnumToID; + MapVector<StringRef, unsigned> OperandNameToID; - /// The keys of this map is a map which have OpName ID values as their keys - /// and instruction operand indices as their values. The values of this map - /// are lists of instruction names. This map helps to unique entries among + /// A key in this map is a vector mapping OpName ID values to instruction + /// operand indices or -1 (but without any trailing -1 values which will be + /// added later). The corresponding value in this map is the index of that row + /// in the emitted OperandMap table. This map helps to unique entries among /// instructions that have identical OpName -> Operand index mapping. - std::map<std::map<unsigned, unsigned>, std::vector<StringRef>> OperandMap; + MapVector<SmallVector<int>, unsigned> OperandMap; // Max operand index seen. unsigned MaxOperandNo = 0; // Fixed/Predefined instructions do not have UseNamedOperandTable enabled, so - // we can just skip them. + // add a dummy map entry for them. + OperandMap.try_emplace({}, 0); + unsigned FirstTargetVal = TargetInstructions.front()->EnumVal; + SmallVector<unsigned> InstructionIndex(FirstTargetVal, 0); for (const CodeGenInstruction *Inst : TargetInstructions) { - if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) + if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) { + InstructionIndex.push_back(0); continue; - std::map<unsigned, unsigned> OpList; + } + SmallVector<int> OpList; for (const auto &Info : Inst->Operands) { unsigned ID = OperandNameToID.try_emplace(Info.Name, OperandNameToID.size()) .first->second; + OpList.resize(std::max((unsigned)OpList.size(), ID + 1), -1); OpList[ID] = Info.MIOperandNo; MaxOperandNo = std::max(MaxOperandNo, Info.MIOperandNo); } - OperandMap[OpList].push_back(Inst->TheDef->getName()); + auto [It, Inserted] = + OperandMap.try_emplace(std::move(OpList), OperandMap.size()); + InstructionIndex.push_back(It->second); } const size_t NumOperandNames = OperandNameToID.size(); - OperandEnumToID.reserve(NumOperandNames); - for (const auto &Op : OperandNameToID) - OperandEnumToID.push_back(Op.second); OS << "#ifdef GET_INSTRINFO_OPERAND_ENUM\n"; OS << "#undef GET_INSTRINFO_OPERAND_ENUM\n"; OS << "namespace llvm::" << Namespace << " {\n"; OS << "enum class OpName {\n"; - for (const auto &[I, Op] : enumerate(OperandNameToID)) - OS << " " << Op.first << " = " << I << ",\n"; + for (const auto &[Op, I] : OperandNameToID) + OS << " " << Op << " = " << I << ",\n"; OS << " NUM_OPERAND_NAMES = " << NumOperandNames << ",\n"; OS << "}; // enum class OpName\n\n"; OS << "LLVM_READONLY\n"; @@ -307,28 +311,22 @@ void InstrInfoEmitter::emitOperandNameMappings( StringRef Type = MaxOperandNo <= INT8_MAX ? "int8_t" : "int16_t"; OS << " static constexpr " << Type << " OperandMap[][" << NumOperandNames << "] = {\n"; - for (const auto &Entry : OperandMap) { - const std::map<unsigned, unsigned> &OpList = Entry.first; - + for (const auto &[OpList, _] : OperandMap) { // Emit a row of the OperandMap table. OS << " {"; - for (unsigned ID : OperandEnumToID) { - auto Iter = OpList.find(ID); - OS << (Iter != OpList.end() ? (int)Iter->second : -1) << ", "; - } + for (unsigned ID = 0; ID < NumOperandNames; ++ID) + OS << (ID < OpList.size() ? OpList[ID] : -1) << ", "; OS << "},\n"; } OS << " };\n"; - OS << " switch(Opcode) {\n"; - for (const auto &[TableIndex, Entry] : enumerate(OperandMap)) { - for (StringRef Name : Entry.second) - OS << " case " << Namespace << "::" << Name << ":\n"; - OS << " return OperandMap[" << TableIndex - << "][static_cast<unsigned>(Name)];\n"; - } - OS << " default: return -1;\n"; - OS << " }\n"; + Type = OperandMap.size() <= UINT8_MAX + 1 ? "uint8_t" : "uint16_t"; + OS << " static constexpr " << Type << " InstructionIndex[] = {"; + for (auto [TableIndex, Entry] : enumerate(InstructionIndex)) + OS << (TableIndex % 16 == 0 ? "\n " : " ") << Entry << ','; + OS << "\n };\n"; + + OS << " return OperandMap[InstructionIndex[Opcode]][(unsigned)Name];\n"; } else { // There are no operands, so no need to emit anything OS << " return -1;\n"; diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index 3754aa2..98b4b5d 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -165,12 +165,10 @@ ASM_FUNCTION_AARCH64_DARWIN_RE = re.compile( ) ASM_FUNCTION_ARM_DARWIN_RE = re.compile( - r"@[ \t]--[ \t]Begin[ \t]function[ \t](?P<func>[^ \t]+?)\n" - r"^[ \t]*\.globl[ \t]*_(?P=func)[ \t]*" + r"^[ \t]*\.globl[ \t]*_(?P<func>[^ \t]+)[ \t]*\@[ \t]*--[ \t]Begin[ \t]function[ \t](?P=func)\n" r"(?P<directives>.*?)" - r"^_(?P=func):\n[ \t]*" - r"(?P<body>.*?)" - r"^[ \t]*@[ \t]--[ \t]End[ \t]function", + r"^_(?P=func):.*?\n" + r"(?P<body>.*?)(?=^[ \t]*@[ \t]--[ \t]End[ \t]function)", flags=(re.M | re.S), ) @@ -593,6 +591,7 @@ def get_run_handler(triple): "riscv64": (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE), "lanai": (scrub_asm_lanai, ASM_FUNCTION_LANAI_RE), "sparc": (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE), + "spirv": (scrub_asm_spirv, ASM_FUNCTION_SPIRV_RE), "spirv32": (scrub_asm_spirv, ASM_FUNCTION_SPIRV_RE), "spirv64": (scrub_asm_spirv, ASM_FUNCTION_SPIRV_RE), "s390x": (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE), diff --git a/llvm/utils/gn/build/write_vcsrevision.py b/llvm/utils/gn/build/write_vcsrevision.py index afd6aae..3a627ee 100755 --- a/llvm/utils/gn/build/write_vcsrevision.py +++ b/llvm/utils/gn/build/write_vcsrevision.py @@ -6,22 +6,13 @@ import argparse import os import subprocess import sys +import shutil THIS_DIR = os.path.abspath(os.path.dirname(__file__)) LLVM_DIR = os.path.dirname(os.path.dirname(os.path.dirname(THIS_DIR))) -def which(program): - # distutils.spawn.which() doesn't find .bat files, - # https://bugs.python.org/issue2200 - for path in os.environ["PATH"].split(os.pathsep): - candidate = os.path.join(path, program) - if os.path.isfile(candidate) and os.access(candidate, os.X_OK): - return candidate - return None - - def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -46,11 +37,11 @@ def main(): vcsrevision_contents = "" if args.write_git_rev: - git, use_shell = which("git"), False + git, use_shell = shutil.which("git"), False if not git: - git = which("git.exe") + git = shutil.which("git.exe") if not git: - git, use_shell = which("git.bat"), True + git, use_shell = shutil.which("git.bat"), True git_dir = ( subprocess.check_output( [git, "rev-parse", "--git-dir"], cwd=LLVM_DIR, shell=use_shell diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvm/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvm/BUILD.gn index ef804af..c7cccc4 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvm/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvm/BUILD.gn @@ -20,5 +20,6 @@ static_library("llvm") { "PreferRegisterOverUnsignedCheck.cpp", "PreferStaticOverAnonymousNamespaceCheck.cpp", "TwineLocalCheck.cpp", + "UseNewMLIROpBuilderCheck.cpp", ] } diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 05ac4c3..d270686 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1038,6 +1038,7 @@ if (current_toolchain == default_toolchain) { "__format/enable_insertable.h", "__format/escaped_output_table.h", "__format/extended_grapheme_cluster_table.h", + "__format/fmt_pair_like.h", "__format/format_arg.h", "__format/format_arg_store.h", "__format/format_args.h", @@ -1192,6 +1193,7 @@ if (current_toolchain == default_toolchain) { "__locale_dir/time.h", "__locale_dir/wbuffer_convert.h", "__locale_dir/wstring_convert.h", + "__log_hardening_failure", "__math/abs.h", "__math/copysign.h", "__math/error_functions.h", diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index b9e8d07..327a8ed 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -317,7 +317,10 @@ if (libcxx_enable_experimental) { static_library("cxx_experimental") { output_dir = runtimes_dir output_name = "c++experimental" - sources = [ "experimental/keep.cpp" ] + sources = [ + "experimental/keep.cpp", + "experimental/log_hardening_failure.cpp", + ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ # TODO TZDB The exception could be moved in chrono once the TZDB library diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn index 978f186..dcac0ca 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn @@ -25,6 +25,7 @@ static_library("Linux") { "NativeRegisterContextLinux.cpp", "NativeRegisterContextLinux_arm.cpp", "NativeRegisterContextLinux_arm64.cpp", + "NativeRegisterContextLinux_arm64dbreg.cpp", "NativeRegisterContextLinux_loongarch64.cpp", "NativeRegisterContextLinux_ppc64le.cpp", "NativeRegisterContextLinux_riscv64.cpp", diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn index 566195e..4e63aa8 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn @@ -69,6 +69,7 @@ static_library("DWARF") { "SymbolFileDWARF.cpp", "SymbolFileDWARFDebugMap.cpp", "SymbolFileDWARFDwo.cpp", + "SymbolFileWasm.cpp", "UniqueDWARFASTType.cpp", ] } diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index 6dcce2d..586f9fd 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -118,6 +118,7 @@ write_lit_cfg("lit_shell_site_cfg") { "CLANG_RESOURCE_DIR=", "DEFAULT_SYSROOT=", "LIBCXX_LIBRARY_DIR=" + rebase_path("$root_build_dir/lib"), + "LLDB_BUILD_LLDBRPC=0", # FIXME: add lldb-rpc-gen target, enable "LLDB_ENABLE_LUA=0", # FIXME: gn arg, use in Config.h "LLDB_ENABLE_LZMA=0", # FIXME: gn arg, use in Config.h "LLDB_ENABLE_PYTHON=0", # FIXME: gn arg, use in Config.h diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index b8f3b4f..499ded9 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -296,6 +296,7 @@ write_cmake_config("llvm-config") { input = "llvm-config.h.cmake" output = "$target_gen_dir/llvm-config.h" values = [ + "LLVM_ENABLE_PROFCHECK=", "LLVM_BUILD_LLVM_DYLIB=", "LLVM_BUILD_SHARED_LIBS=", "LLVM_ENABLE_LLVM_C_EXPORT_ANNOTATIONS=", diff --git a/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn index 5c96bd8..eac2cd4 100644 --- a/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn @@ -57,13 +57,7 @@ static_library("MC") { "MCSPIRVStreamer.cpp", "MCSchedule.cpp", "MCSection.cpp", - "MCSectionCOFF.cpp", - "MCSectionDXContainer.cpp", - "MCSectionELF.cpp", - "MCSectionGOFF.cpp", "MCSectionMachO.cpp", - "MCSectionWasm.cpp", - "MCSectionXCOFF.cpp", "MCStreamer.cpp", "MCSubtargetInfo.cpp", "MCSymbol.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/ObjCARC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/ObjCARC/BUILD.gn index e7b2084..d4ad915 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/ObjCARC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/ObjCARC/BUILD.gn @@ -9,7 +9,6 @@ static_library("ObjCARC") { sources = [ "DependencyAnalysis.cpp", "ObjCARC.cpp", - "ObjCARCAPElim.cpp", "ObjCARCContract.cpp", "ObjCARCExpand.cpp", "ObjCARCOpts.cpp", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 7ed0d3c..08cddc1 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -64,6 +64,7 @@ write_lit_config("lit_site_cfg") { "LLVM_APPEND_VC_REV=0", "LLVM_ENABLE_FFI=0", "LLVM_ENABLE_HTTPLIB=0", + "LLVM_ENABLE_PROFCHECK=0", "LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=0", "LLVM_FORCE_VC_REVISION=", "LLVM_HAS_LOGF128=0", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index e1d740a..b32b55f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -42,6 +42,7 @@ unittest("CodeGenTests") { "ScalableVectorMVTsTest.cpp", "SchedBoundary.cpp", "SelectionDAGAddressAnalysisTest.cpp", + "SelectionDAGNodeConstructionTest.cpp", "SelectionDAGPatternMatchTest.cpp", "TargetOptionsTest.cpp", "TestAsmPrinter.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 3aaec30..bcb8535 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -34,6 +34,7 @@ unittest("SupportTests") { "DJBTest.cpp", "DataExtractorTest.cpp", "DebugCounterTest.cpp", + "DebugLogTest.cpp", "DebugTest.cpp", "DivisionByConstantTest.cpp", "ELFAttributeParserTest.cpp", diff --git a/llvm/utils/lit/lit/formats/base.py b/llvm/utils/lit/lit/formats/base.py index 27f7c7e..db5c1c3 100644 --- a/llvm/utils/lit/lit/formats/base.py +++ b/llvm/utils/lit/lit/formats/base.py @@ -34,8 +34,7 @@ class FileBasedTest(TestFormat): if filename.startswith(".") or filename in localConfig.excludes: return - base, ext = os.path.splitext(filename) - if ext in localConfig.suffixes: + if any(filename.endswith(suffix) for suffix in localConfig.suffixes): yield lit.Test.Test(testSuite, path_in_suite, localConfig) def getTestsInDirectory(self, testSuite, path_in_suite, litConfig, localConfig): diff --git a/llvm/utils/lldbDataFormatters.py b/llvm/utils/lldbDataFormatters.py index c5cd627..7fbeabe6 100644 --- a/llvm/utils/lldbDataFormatters.py +++ b/llvm/utils/lldbDataFormatters.py @@ -94,6 +94,11 @@ def __lldb_init_module(debugger, internal_dict): f"-l {__name__}.ExpectedSynthetic " '-x "^llvm::Expected<.+>$"' ) + debugger.HandleCommand( + "type summary add -w llvm " + f"-F {__name__}.SmallBitVectorSummary " + "llvm::SmallBitVector" + ) # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl @@ -448,3 +453,28 @@ class ExpectedSynthetic: if idx == 0: return self.stored_value return lldb.SBValue() + + +def SmallBitVectorSummary(valobj, _): + underlyingValue = valobj.GetChildMemberWithName("X").unsigned + numBaseBits = valobj.target.addr_size * 8 + smallNumRawBits = numBaseBits - 1 + smallNumSizeBits = None + if numBaseBits == 32: + smallNumSizeBits = 5 + elif numBaseBits == 64: + smallNumSizeBits = 6 + else: + smallNumSizeBits = smallNumRawBits + smallNumDataBits = smallNumRawBits - smallNumSizeBits + + # If our underlying value is not small, print we can not dump large values. + isSmallMask = 1 + if underlyingValue & isSmallMask == 0: + return "<can not read large SmallBitVector>" + + smallRawBits = underlyingValue >> 1 + smallSize = smallRawBits >> smallNumDataBits + bits = smallRawBits & ((1 << (smallSize + 1)) - 1) + # format `bits` in binary (b), with 0 padding, of width `smallSize`, and left aligned (>) + return f"[{bits:0>{smallSize}b}]" diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py new file mode 100644 index 0000000..c48503e --- /dev/null +++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py @@ -0,0 +1,304 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""IR2Vec Triplet Generator + +Generates IR2Vec triplets by applying random optimization levels to LLVM IR files +and extracting triplets using llvm-ir2vec. Automatically generates preprocessed +files: entity2id.txt, relation2id.txt, and train2id.txt. + +Usage: + python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir> +""" + +import argparse +import logging +import os +import random +import subprocess +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import List, Set, Tuple + +# Configuration +OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"] +DEFAULT_MAX_WORKERS = 100 + +logger = logging.getLogger(__name__) + + +# TODO: Change this to a dataclass with slots +# when Python 3.10+ is the minimum version +# https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass +class TripletResult: + """Result from processing a single LLVM IR file""" + + __slots__ = ["triplets", "max_relation"] + + def __init__(self, triplets: Set[str], max_relation: int): + self.triplets = triplets + self.max_relation = max_relation + + +class IR2VecTripletGenerator: + """Main class for generating IR2Vec triplets""" + + def __init__( + self, + llvm_build_dir: Path, + num_optimizations: int, + output_dir: Path, + max_workers: int = DEFAULT_MAX_WORKERS, + ): + self.llvm_build_dir = llvm_build_dir + self.num_optimizations = num_optimizations + self.output_dir = output_dir + self.max_workers = max_workers + + # Tool paths + self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt") + self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec") + + self._validate_setup() + + # Create output directory if it doesn't exist + self.output_dir.mkdir(parents=True, exist_ok=True) + + def _validate_setup(self): + """Validate that all required tools and paths exist""" + if not self.llvm_build_dir.exists(): + raise FileNotFoundError( + f"LLVM build directory not found: {self.llvm_build_dir}" + ) + + if not os.path.isfile(self.opt_binary) or not os.access( + self.opt_binary, os.X_OK + ): + raise FileNotFoundError( + f"opt binary not found or not executable: {self.opt_binary}" + ) + + if not os.path.isfile(self.ir2vec_binary) or not os.access( + self.ir2vec_binary, os.X_OK + ): + raise FileNotFoundError( + f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}" + ) + + if not (1 <= self.num_optimizations <= len(OPT_LEVELS)): + raise ValueError( + f"Number of optimizations must be between 1-{len(OPT_LEVELS)}" + ) + + def _select_optimization_levels(self) -> List[str]: + """Select unique random optimization levels""" + return random.sample(OPT_LEVELS, self.num_optimizations) + + def _process_single_file(self, input_file: Path) -> TripletResult: + """Process a single LLVM IR file with multiple optimization levels""" + all_triplets = set() + max_relation = 1 + opt_levels = self._select_optimization_levels() + + for opt_level in opt_levels: + triplets, file_max_relation = self._run_pipeline(input_file, opt_level) + if triplets: + all_triplets.update(triplets) + max_relation = max(max_relation, file_max_relation) + logger.debug( + f"Generated {len(triplets)} triplets for {input_file} with {opt_level}" + ) + + return TripletResult(all_triplets, max_relation) + + def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int]: + """Run opt | llvm-ir2vec pipeline using subprocess pipes.""" + try: + # Run opt first + opt_proc = subprocess.Popen( + [self.opt_binary, f"-{opt_level}", str(input_file), "-o", "-"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Run llvm-ir2vec with opt's output as input + ir2vec_proc = subprocess.Popen( + [self.ir2vec_binary, "--mode=triplets", "-", "-o", "-"], + stdin=opt_proc.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + opt_proc.stdout.close() + stdout, _ = ir2vec_proc.communicate() + opt_proc.wait() + + # Check if either process failed + if opt_proc.returncode != 0 or ir2vec_proc.returncode != 0: + return set(), 1 + + return self._parse_triplet_output(stdout) + except (subprocess.SubprocessError, OSError): + return set(), 1 + + def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]: + """Parse triplet output and extract max relation""" + if not output.strip(): + return set(), 1 + + lines = output.strip().split("\n") + max_relation = 1 + + # Extract max relation from metadata line + if lines and lines[0].startswith("MAX_RELATION="): + max_relation = int(lines[0].split("=")[1]) + lines = lines[1:] + + # Remove duplicate triplets by converting to a set + return set(lines), max_relation + + def generate_triplets(self, file_list: Path) -> None: + """Main method to generate triplets from a list of LLVM IR files""" + input_files = self._read_file_list(file_list) + logger.info( + f"Processing {len(input_files)} files with {self.num_optimizations} " + f"optimization levels using {self.max_workers} workers" + ) + + all_triplets = set() + global_max_relation = 1 + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_file = { + executor.submit(self._process_single_file, file): file + for file in input_files + } + + for future in as_completed(future_to_file): + try: + result = future.result() + all_triplets.update(result.triplets) + global_max_relation = max(global_max_relation, result.max_relation) + except (subprocess.SubprocessError, OSError, ValueError) as e: + file_path = future_to_file[future] + logger.error(f"Error processing {file_path}: {e}") + + self._generate_output_files(all_triplets, global_max_relation) + logger.info("Processing completed successfully") + + def _read_file_list(self, file_list: Path) -> List[Path]: + """Read and validate the list of input files""" + input_files = [] + with open(file_list, "r") as f: + for line_num, line in enumerate(f, 1): + if line := line.strip(): + file_path = Path(line) + if file_path.exists(): + input_files.append(file_path) + else: + logger.warning(f"File not found (line {line_num}): {file_path}") + + if not input_files: + raise ValueError("No valid input files found") + return input_files + + def _generate_output_files(self, all_triplets: Set[str], max_relation: int) -> None: + """Generate the final output files""" + logger.info(f"Generating output files with {len(all_triplets)} unique triplets") + + # Write all output files -- train2id.txt, entity2id.txt, relation2id.txt + train2id_file = os.path.join(self.output_dir, "train2id.txt") + entity2id_file = os.path.join(self.output_dir, "entity2id.txt") + relation2id_file = os.path.join(self.output_dir, "relation2id.txt") + + with open(train2id_file, "w") as f: + f.write(f"{len(all_triplets)}\n") + f.writelines(f"{triplet}\n" for triplet in all_triplets) + + self._generate_entity2id(entity2id_file) + self._generate_relation2id(relation2id_file, max_relation) + + def _generate_entity2id(self, output_file: Path) -> None: + """Generate entity2id.txt using llvm-ir2vec""" + subprocess.run( + [str(self.ir2vec_binary), "--mode=entities", "-o", str(output_file)], + check=True, + capture_output=True, + ) + + def _generate_relation2id(self, output_file: Path, max_relation: int) -> None: + """Generate relation2id.txt from max relation""" + max_relation = max(max_relation, 1) # At least Type and Next relations + num_relations = max_relation + 1 + + with open(output_file, "w") as f: + f.write(f"{num_relations}\n") + f.write("Type\t0\n") + f.write("Next\t1\n") + f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations)) + + +def main(): + """Main entry point""" + parser = argparse.ArgumentParser( + description="Generate IR2Vec triplets from LLVM IR files", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument( + "llvm_build_dir", type=Path, help="Path to LLVM build directory" + ) + parser.add_argument( + "num_optimizations", + type=int, + help="Number of optimization levels to apply (1-6)", + ) + parser.add_argument( + "ll_file_list", + type=Path, + help="File containing list of LLVM IR files to process", + ) + parser.add_argument( + "output_dir", type=Path, help="Output directory for generated files" + ) + parser.add_argument( + "-j", + "--max-workers", + type=int, + default=DEFAULT_MAX_WORKERS, + help=f"Maximum number of parallel workers (default: {DEFAULT_MAX_WORKERS})", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable debug logging" + ) + parser.add_argument( + "-q", "--quiet", action="store_true", help="Suppress all output except errors" + ) + + args = parser.parse_args() + + # Configure logging + level = ( + logging.ERROR + if args.quiet + else (logging.DEBUG if args.verbose else logging.INFO) + ) + logging.basicConfig( + level=level, + format="[%(asctime)s] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", + ) + + generator = IR2VecTripletGenerator( + args.llvm_build_dir, + args.num_optimizations, + args.output_dir, + args.max_workers, + ) + generator.generate_triplets(args.ll_file_list) + + +if __name__ == "__main__": + main() diff --git a/llvm/utils/release/github-upload-release.py b/llvm/utils/release/github-upload-release.py index 90c222d..5ed037ee 100755 --- a/llvm/utils/release/github-upload-release.py +++ b/llvm/utils/release/github-upload-release.py @@ -45,19 +45,39 @@ def create_release(repo, release, tag=None, name=None, message=None): # Note that these lines are not length limited because if we do so, GitHub # assumes that should be how it is laid out on the page. We want GitHub to # do the reflowing for us instead. + # + # Once all the atuomatic binary builds have completed, the HTML comments + # with UPPERCASE markers in them will be removed to reveal the download + # links later. Other lines are surrounded in <!-- --> for release uploaders + # to manually uncomment when they upload that package. message = dedent( """\ -LLVM {release} Release +## LLVM {release} Release -## Package Types +<!-- AUTOMATIC_DOWNLOAD_LINKS_BEGIN +* [Linux x86_64](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-Linux-X64.tar.xz) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-Linux-X64.tar.xz.jsonl)) +* [Linux Arm64](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-Linux-ARM64.tar.xz) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-Linux-ARM64.tar.xz.jsonl)) +AUTOMATIC_DOWNLOAD_LINKS_END --> +<!-- * [Linux Armv7-a](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/clang+llvm-{release}-armv7a-linux-gnueabihf.tar.gz) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}}/clang+llvm-{release}-armv7a-linux-gnueabihf.tar.gz.sig)) --> -Each platform has one binary release package. The file name starts with either `LLVM-` or `clang+llvm-` and ends with the platform's name. For example, `LLVM-{release}-Linux-ARM64.tar.xz` contains LLVM binaries for Arm64 Linux. +<!-- AUTOMATIC_DOWNLOAD_LINKS_BEGIN +* [macOS Apple Silicon](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-macOS-ARM64.tar.xz) (ARM64) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-macOS-ARM64.tar.xz.jsonl)) +* [macOS Intel](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-macOS-X64.tar.xz) (x86-64) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-macOS-X64.tar.xz.jsonl)) +AUTOMATIC_DOWNLOAD_LINKS_END --> + +<!-- * Windows x64 (64-bit): [installer](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-win64.exe) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-win64.exe.sig)), [archive](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/clang+llvm-{release}-x86_64-pc-windows-msvc.tar.xz) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/clang+llvm-{release}-x86_64-pc-windows-msvc.tar.xz.sig)) --> +<!-- * Windows x86 (32-bit): [installer](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-win32.exe) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-win32.exe.sig)) --> +<!-- * Windows on Arm (ARM64): [installer](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-woa64.exe) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/LLVM-{release}-woa64.exe.sig)), [archive](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/clang+llvm-{release}-aarch64-pc-windows-msvc.tar.xz) ([signature](https://github.com/llvm/llvm-project/releases/download/llvmorg-{release}/clang+llvm-{release}-aarch64-pc-windows-msvc.tar.xz.sig)) --> -Except for Windows. Where `LLVM-*.exe` is an installer intended for using LLVM as a toolchain and `clang+llvm-` contains the contents of the installer, plus libraries and tools not normally used in a toolchain. You most likely want the `LLVM-` installer, unless you are developing software which itself uses LLVM, in which case choose `clang+llvm-`. +Download links will appear here once builds have completed. <!-- AUTOMATIC_DOWNLOAD_LINKS_PLACEHOLDER --> + +For any other variants of platform and architecture, check the full list of release packages at the bottom of this release page. If you do not find a release package for your platform, you may be able to find a community built package on the LLVM Discourse forum thread for this release. Remember that these are built by volunteers and may not always be available. If you rely on a platform or configuration that is not one of the defaults, we suggest you use the binaries that your platform provides, or build your own release packages. + +## Package Types -If you do not find a release package for your platform, you may be able to find a community built package on the LLVM Discourse forum thread for this release. Remember that these are built by volunteers and may not always be available. +Each platform has one binary release package. The file name starts with either `LLVM-` or `clang+llvm-` and ends with the platform's name. For example, `LLVM-{release}-Linux-ARM64.tar.xz` contains LLVM binaries for Arm64 Linux. -If you rely on a platform or configuration that is not one of the defaults, we suggest you use the binaries that your platform provides, or build your own release packages. +Except for Windows. Where `LLVM-*.exe` is an installer intended for using LLVM as a toolchain and the archive `clang+llvm-` contains the contents of the installer, plus libraries and tools not normally used in a toolchain. You most likely want the `LLVM-` installer, unless you are developing software which itself uses LLVM, in which case choose `clang+llvm-`. In addition, source archives are available: * `<sub-project>-{release}.src.tar.xz` are archives of the sources of specific sub-projects of `llvm-project` (except for `test-suite` which is an archive of the [LLVM Test Suite](https://github.com/llvm/llvm-test-suite)). @@ -95,9 +115,35 @@ def upload_files(repo, release, files): print("Done") +def uncomment_download_links(repo, release): + release = repo.get_release("llvmorg-{}".format(release)) + + new_message = [] + to_remove = [ + "AUTOMATIC_DOWNLOAD_LINKS_BEGIN", + "AUTOMATIC_DOWNLOAD_LINKS_END", + "AUTOMATIC_DOWNLOAD_LINKS_PLACEHOLDER", + ] + for line in release.body.splitlines(): + for comment in to_remove: + if comment in line: + break + else: + new_message.append(line) + + release.update_release( + name=release.title, + message="\n".join(new_message), + draft=release.draft, + prerelease=release.prerelease, + ) + + parser = argparse.ArgumentParser() parser.add_argument( - "command", type=str, choices=["create", "upload", "check-permissions"] + "command", + type=str, + choices=["create", "upload", "check-permissions", "uncomment_download_links"], ) # All args @@ -137,3 +183,5 @@ if args.command == "create": create_release(llvm_repo, args.release) if args.command == "upload": upload_files(llvm_repo, args.release, args.files) +if args.command == "uncomment_download_links": + uncomment_download_links(llvm_repo, args.release) diff --git a/llvm/utils/update_mir_test_checks.py b/llvm/utils/update_mir_test_checks.py index 8db46ad..c4ee052 100755 --- a/llvm/utils/update_mir_test_checks.py +++ b/llvm/utils/update_mir_test_checks.py @@ -35,9 +35,14 @@ from UpdateTestChecks import common VREG_RE = re.compile(r"(%[0-9]+)(?:\.[a-z0-9_]+)?(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?") MI_FLAGS_STR = ( r"(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn " - r"|reassoc |nuw |nsw |exact |nofpexcept |nomerge |disjoint )*" + r"|reassoc |nuw |nsw |exact |nofpexcept |nomerge |unpredictable " + r"|noconvergent |nneg |disjoint |nusw |samesign |inbounds )*" ) VREG_DEF_FLAGS_STR = r"(?:dead |undef )*" + +# Pattern to match the defined vregs and the opcode of an instruction that +# defines vregs. Opcodes starting with a lower-case 't' are allowed to match +# ARM's thumb instructions, like tADDi8 and t2ADDri. VREG_DEF_RE = re.compile( r"^ *(?P<vregs>{2}{0}(?:, {2}{0})*) = " r"{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)".format( |