diff options
Diffstat (limited to 'llvm/lib')
196 files changed, 7827 insertions, 1551 deletions
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index f9bf092..6f19a68 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -255,6 +255,12 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name, if (!ContainedType) return; + SmallVector<uint64_t> ArrayDimensions; + while (ArrayType *AT = dyn_cast<ArrayType>(ContainedType)) { + ArrayDimensions.push_back(AT->getNumElements()); + ContainedType = AT->getElementType(); + } + StringRef ElementName; ElementType ET = toDXILElementType(ContainedType, IsSigned); if (ET != ElementType::Invalid) { @@ -271,6 +277,8 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name, DestStream << "<" << ElementName; if (const FixedVectorType *VTy = dyn_cast<FixedVectorType>(ContainedType)) DestStream << VTy->getNumElements(); + for (uint64_t Dim : ArrayDimensions) + DestStream << "[" << Dim << "]"; DestStream << ">"; } diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 853bd66..a572eef 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1582,6 +1582,23 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B, return nullptr; } +/// Returns the absolute value of \p A. In the context of dependence analysis, +/// we need an absolute value in a mathematical sense. If \p A is the signed +/// minimum value, we cannot represent it unless extending the original type. +/// Thus if we cannot prove that \p A is not the signed minimum value, returns +/// nullptr. +static const SCEV *absSCEVNoSignedOverflow(const SCEV *A, ScalarEvolution &SE) { + IntegerType *Ty = cast<IntegerType>(A->getType()); + if (!Ty) + return nullptr; + + const SCEV *SMin = + SE.getConstant(APInt::getSignedMinValue(Ty->getBitWidth())); + if (!SE.isKnownPredicate(CmpInst::ICMP_NE, A, SMin)) + return nullptr; + return SE.getAbsExpr(A, /*IsNSW=*/true); +} + /// Returns true iff \p Test is enabled. static bool isDependenceTestEnabled(DependenceTestType Test) { if (EnableDependenceTest == DependenceTestType::All) @@ -1669,21 +1686,25 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); // check that |Delta| < iteration count - if (const SCEV *UpperBound = - collectUpperBound(CurSrcLoop, Delta->getType())) { + bool IsDeltaLarge = [&] { + const SCEV *UpperBound = collectUpperBound(CurSrcLoop, Delta->getType()); + if (!UpperBound) + return false; + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); - const SCEV *AbsDelta = - SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); - const SCEV *AbsCoeff = - SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *AbsDelta = absSCEVNoSignedOverflow(Delta, *SE); + const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE); + if (!AbsDelta || !AbsCoeff) + return false; const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); - if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { - // Distance greater than trip count - no dependence - ++StrongSIVindependence; - ++StrongSIVsuccesses; - return true; - } + return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product); + }(); + if (IsDeltaLarge) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; } // Can we compute distance? @@ -2259,6 +2280,9 @@ bool DependenceInfo::weakZeroSrcSIVtest( const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); if (!ConstCoeff) return false; + + // Since ConstCoeff is constant, !isKnownNegative means it's non-negative. + // TODO: Bail out if it's a signed minimum value. const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; @@ -2369,6 +2393,9 @@ bool DependenceInfo::weakZeroDstSIVtest( const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); if (!ConstCoeff) return false; + + // Since ConstCoeff is constant, !isKnownNegative means it's non-negative. + // TODO: Bail out if it's a signed minimum value. const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 757f689..c4fee39 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -751,7 +751,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + addCost(-std::max(0, CA.getThreshold() - CA.getCost())); } } else // Otherwise simply add the cost for merely making the call. @@ -1191,7 +1191,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + addCost(InlineConstants::ColdccPenalty); LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n"); @@ -2193,7 +2193,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. if (isSoleCallToLocalFunction(Call, F)) { - Cost -= LastCallToStaticBonus; + addCost(-LastCallToStaticBonus); StaticBonusApplied = LastCallToStaticBonus; } } diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 050c327..424a7fe 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -436,10 +436,9 @@ bool IndexedReference::delinearize(const LoopInfo &LI) { const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr; if (StepRec && SE.isKnownNegative(StepRec)) - AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(), - SE.getNegativeSCEV(StepRec), - AccessFnAR->getLoop(), - AccessFnAR->getNoWrapFlags()); + AccessFn = SE.getAddRecExpr( + AccessFnAR->getStart(), SE.getNegativeSCEV(StepRec), + AccessFnAR->getLoop(), SCEV::NoWrapFlags::FlagAnyWrap); const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); Subscripts.push_back(Div); Sizes.push_back(ElemSize); diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 1c5f08e..edca387 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -288,6 +288,34 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, LocationSize::precise(DL.getTypeStoreSize( II->getArgOperand(1)->getType())), AATags); + case Intrinsic::matrix_column_major_load: + case Intrinsic::matrix_column_major_store: { + bool IsLoad = II->getIntrinsicID() == Intrinsic::matrix_column_major_load; + assert(ArgIdx == (IsLoad ? 0 : 1) && "Invalid argument index"); + + auto *Stride = dyn_cast<ConstantInt>(II->getArgOperand(IsLoad ? 1 : 2)); + uint64_t Rows = + cast<ConstantInt>(II->getArgOperand(IsLoad ? 3 : 4))->getZExtValue(); + uint64_t Cols = + cast<ConstantInt>(II->getArgOperand(IsLoad ? 4 : 5))->getZExtValue(); + + // The stride is dynamic, so there's nothing we can say. + if (!Stride) + return MemoryLocation(Arg, LocationSize::afterPointer(), AATags); + + uint64_t ConstStride = Stride->getZExtValue(); + auto *VT = cast<VectorType>(IsLoad ? II->getType() + : II->getArgOperand(0)->getType()); + assert(Cols != 0 && "Matrix cannot have 0 columns"); + TypeSize Size = DL.getTypeAllocSize(VT->getScalarType()) * + (ConstStride * (Cols - 1) + Rows); + + // In the unstrided case, we have a precise size, ... + if (ConstStride == Rows) + return MemoryLocation(Arg, LocationSize::precise(Size), AATags); + // otherwise we merely obtain an upper bound. + return MemoryLocation(Arg, LocationSize::upperBound(Size), AATags); + } } assert( diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 92a5b6f..b09f4ed 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -241,9 +241,13 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, ColdBytes += TotalSize; // If we have the max cold context size from summary information and have // requested identification of contexts above a percentage of the max, see - // if this context qualifies. - if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 && - TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize) + // if this context qualifies. We should assume this is large if we rebuilt + // the trie from existing metadata (i.e. to update after inlining), in + // which case we don't have a MaxSize from the profile - we assume any + // context size info in existence on the metadata should be propagated. + if (BuiltFromExistingMetadata || + (MaxColdSize > 0 && MinPercentMaxColdSize < 100 && + TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)) LargeColdContext = true; } // Only add the context size info as metadata if we need it in the thin diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6f7dd79..7597f3a 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3768,13 +3768,11 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, return getOrCreateAddRecExpr(Operands, L, Flags); } -const SCEV * -ScalarEvolution::getGEPExpr(GEPOperator *GEP, - const SmallVectorImpl<const SCEV *> &IndexExprs) { +const SCEV *ScalarEvolution::getGEPExpr(GEPOperator *GEP, + ArrayRef<const SCEV *> IndexExprs) { const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. - Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); GEPNoWrapFlags NW = GEP->getNoWrapFlags(); if (NW != GEPNoWrapFlags::none()) { // We'd like to propagate flags from the IR to the corresponding SCEV nodes, @@ -3787,13 +3785,20 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, NW = GEPNoWrapFlags::none(); } + return getGEPExpr(BaseExpr, IndexExprs, GEP->getSourceElementType(), NW); +} + +const SCEV *ScalarEvolution::getGEPExpr(const SCEV *BaseExpr, + ArrayRef<const SCEV *> IndexExprs, + Type *SrcElementTy, GEPNoWrapFlags NW) { SCEV::NoWrapFlags OffsetWrap = SCEV::FlagAnyWrap; if (NW.hasNoUnsignedSignedWrap()) OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNSW); if (NW.hasNoUnsignedWrap()) OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNUW); - Type *CurTy = GEP->getType(); + Type *CurTy = BaseExpr->getType(); + Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); bool FirstIter = true; SmallVector<const SCEV *, 4> Offsets; for (const SCEV *IndexExpr : IndexExprs) { @@ -3812,7 +3817,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, if (FirstIter) { assert(isa<PointerType>(CurTy) && "The first index of a GEP indexes a pointer"); - CurTy = GEP->getSourceElementType(); + CurTy = SrcElementTy; FirstIter = false; } else { CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index bf62623..c47a1c1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1001,13 +1001,25 @@ InstructionCost TargetTransformInfo::getShuffleCost( TargetTransformInfo::PartialReductionExtendKind TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) { - if (isa<SExtInst>(I)) - return PR_SignExtend; - if (isa<ZExtInst>(I)) - return PR_ZeroExtend; + if (auto *Cast = dyn_cast<CastInst>(I)) + return getPartialReductionExtendKind(Cast->getOpcode()); return PR_None; } +TargetTransformInfo::PartialReductionExtendKind +TargetTransformInfo::getPartialReductionExtendKind( + Instruction::CastOps CastOpc) { + switch (CastOpc) { + case Instruction::CastOps::ZExt: + return PR_ZeroExtend; + case Instruction::CastOps::SExt: + return PR_SignExtend; + default: + return PR_None; + } + llvm_unreachable("Unhandled cast opcode"); +} + TTI::CastContextHint TargetTransformInfo::getCastContextHint(const Instruction *I) { if (!I) diff --git a/llvm/lib/Analysis/models/x86SeedEmbeddingVocab100D.json b/llvm/lib/Analysis/models/x86SeedEmbeddingVocab100D.json index 0afe5c7..f026b0d 100644 --- a/llvm/lib/Analysis/models/x86SeedEmbeddingVocab100D.json +++ b/llvm/lib/Analysis/models/x86SeedEmbeddingVocab100D.json @@ -1,5 +1,5 @@ { - "entities" : { + "Opcodes" : { "ABS_Fp":[0.07323841750621796, -0.006006906274706125, 0.09751169383525848, -0.011089739389717579, 0.06642112135887146, -0.015824640169739723, -0.021592319011688232, -0.0035401300992816687, 0.06047678738832474, -0.007392085622996092, 0.07134906202554703, -0.019624482840299606, -0.10975595563650131, -0.007685789838433266, 0.07451746612787247, 0.06384266912937164, -0.08230067789554596, 0.050922468304634094, 0.013724055141210556, 0.015687907114624977, -0.018451329320669174, 0.046987198293209076, -0.037734340876340866, -0.07235030829906464, 0.10218106210231781, 0.08037368208169937, -0.029537858441472054, -0.047520823776721954, -0.022125739604234695, -0.03125226870179176, -0.02882847562432289, 0.013811410404741764, 0.0023568253964185715, 0.017958490177989006, -0.05359291657805443, -0.03606243059039116, 0.07840022444725037, -0.016711654141545296, -0.038644544780254364, 0.05886651948094368, -0.011418955400586128, -0.04882095381617546, 0.04027162492275238, 0.001088760793209076, 0.03045983798801899, -0.10998888313770294, -0.0097441291436553, 0.015445191413164139, 0.030951637774705887, -0.06309321522712708, -0.019475746899843216, -0.029662512242794037, 0.05312168970704079, 0.05355998873710632, 0.05060160160064697, -0.053278811275959015, -0.01803833432495594, 0.010853713378310204, -0.053911495953798294, 0.06630647927522659, -0.08671313524246216, 0.0699775293469429, -0.08346731215715408, -0.045348167419433594, 0.06779918074607849, 0.008865933865308762, 0.05460203066468239, 0.007126103155314922, 0.0012282058596611023, 0.06817980855703354, 0.0216530654579401, 0.03552381321787834, 0.015414077788591385, -0.06002715229988098, 0.05233345925807953, 0.0782286673784256, 0.04220856353640556, -0.005762201733887196, 0.004772072657942772, 0.004578332882374525, 0.002619141712784767, 0.024511393159627914, -0.10089710354804993, 0.018322769552469254, 0.020811809226870537, -0.03358744457364082, -0.06896928697824478, -0.007399350870400667, -0.044467780739068985, -0.08094192296266556, -0.09795571863651276, 0.08391229063272476, -0.04749457910656929, 0.0029586481396108866, -5.354872337193228e-05, 0.005788655485957861, 0.015252145007252693, 0.06928747892379761, 0.041780371218919754, 0.016391364857554436], "ADC":[-0.07533542811870575, -0.01729339174926281, 0.04298720881342888, 0.015697332099080086, -0.04403507336974144, -0.059322185814380646, -0.050977922976017, 0.027526788413524628, -0.07009710371494293, -0.025621667504310608, 0.0352291613817215, -0.011538374237716198, 0.03682859241962433, -0.09788215160369873, -0.07216927409172058, -0.03659192472696304, 0.05676230415701866, -0.06369645893573761, -0.04756825789809227, 0.005865555722266436, 0.022270306944847107, -0.042112063616514206, 0.07008901983499527, 0.07748222351074219, -0.1020870953798294, -0.008511601015925407, -0.05725255608558655, -0.07881367206573486, 0.05627593398094177, -0.0005361076910048723, 0.03351512551307678, 0.04348289221525192, -0.08322969079017639, -0.02161242999136448, -0.07805898040533066, 0.04819482937455177, -0.061123576015233994, -0.010114834643900394, -0.04676959663629532, -0.008176938630640507, 0.010575453750789165, -0.04312445595860481, 0.00376943894661963, -0.0691257119178772, 0.03553615137934685, 0.10397598147392273, 0.009375158697366714, 0.001147320494055748, 0.026351911947131157, -0.0194610096514225, -0.05202522128820419, 0.014047946780920029, -0.040036872029304504, 0.06963572651147842, 0.04827437922358513, -0.06908547878265381, 0.024857567623257637, -0.03304143249988556, 0.02291242778301239, 0.07687342166900635, -0.05110599845647812, -0.00873416755348444, 0.026205750182271004, 0.045064594596624374, -0.03565925359725952, 0.09580051153898239, -0.02518773265182972, 0.047807395458221436, -0.03548192232847214, 0.08286304026842117, -0.053511787205934525, 0.02892065793275833, -0.0495525486767292, 0.02590095065534115, -0.006982128601521254, 0.006042638327926397, -0.07269058376550674, 0.02401554025709629, -0.05660006031394005, -0.026029467582702637, 0.05318204686045647, 0.06714116781949997, -0.0023821850772947073, 0.05028798058629036, -0.005811943672597408, -0.003296421840786934, -0.005409242119640112, -0.10150349885225296, -0.06406981498003006, 0.02553202211856842, -0.002790689468383789, 0.0663856491446495, 0.09109167754650116, -0.04678672179579735, 0.022019781172275543, 0.007821275852620602, 0.022490357980132103, -0.058503177016973495, 0.08841150254011154, -0.00892670825123787], "ADD":[-0.037626221776008606, 0.006784931290894747, 0.10051396489143372, -0.0014993306249380112, -0.0323498398065567, -0.03148593008518219, -0.014100957661867142, -0.020252650603652, 0.014126972295343876, -0.1295478343963623, 0.08520576357841492, -0.02513248659670353, 0.03539956361055374, -0.07019674777984619, -0.019069846719503403, 0.016678515821695328, -0.009174983017146587, -0.019034702330827713, -0.024083402007818222, -0.07829779386520386, -0.007908892817795277, -0.07924024760723114, -0.034599609673023224, 0.05271153524518013, 0.0016642026603221893, -0.03938138112425804, 0.0019624519627541304, 0.03562740981578827, 0.07340876758098602, 0.09457183629274368, -0.06507840752601624, 0.00246993126347661, -0.004548616707324982, 0.058226197957992554, -0.021043049171566963, -0.0599520243704319, -0.03138553351163864, 0.03265950828790665, 0.004963710438460112, -0.003248866181820631, -0.04021746292710304, 0.038208190351724625, -0.02256007120013237, 0.10770396143198013, 0.013757425360381603, 0.040707558393478394, -0.00694271270185709, -0.012331271544098854, 0.004992029629647732, -0.032236646860837936, 0.01055158581584692, 0.04604483023285866, 0.09973260760307312, 0.07322807610034943, 0.06853726506233215, 0.004230210557579994, -0.04007832333445549, 0.16341225802898407, -0.01683313027024269, -0.01998194307088852, -0.035919081419706345, -0.055582448840141296, 0.008072910830378532, -0.0054771858267486095, -0.013343624770641327, 0.014230597764253616, -0.06542462855577469, 0.015897123143076897, -0.06011642515659332, 0.07983837276697159, 0.026512078940868378, 0.014883842319250107, -0.015171286650002003, 4.1508101276122034e-05, -0.048078570514917374, -0.052594274282455444, -0.07897629588842392, -0.01334046758711338, -0.06180298700928688, 0.022423526272177696, 0.07393807917833328, 0.022332284599542618, 0.04279463365674019, 0.04075624793767929, 0.007524204906076193, -0.024405587464571, 0.0011822516098618507, -0.0019135301699861884, 0.10789427906274796, -0.040499038994312286, 0.011574117466807365, 0.048836030066013336, 0.0380941741168499, -0.047072283923625946, -0.01285380870103836, -0.038019485771656036, -0.06277137994766235, -0.0034404860343784094, -0.031123748049139977, 0.04279843345284462], @@ -47,7 +47,6 @@ "CVTSS":[-0.06638028472661972, -0.011326023377478123, 0.008208844810724258, 0.007368308026343584, 0.009791173972189426, -0.03396046161651611, 0.02250068075954914, -0.057750262320041656, -0.04949551820755005, 0.02559898979961872, -0.025012727826833725, -0.05923935025930405, 0.005058884620666504, 0.008716589771211147, -0.017511164769530296, -0.07095059007406235, -0.06573225557804108, -0.028140492737293243, 0.11092227697372437, 0.02664722129702568, -0.01997300609946251, 0.0798712745308876, -0.022800235077738762, 0.09157945215702057, 0.025709187611937523, -0.09037603437900543, -0.07092109322547913, -0.04094154015183449, -0.025702493265271187, 0.015247789211571217, 0.06089004501700401, 0.051023274660110474, -0.04670926183462143, 0.04763137549161911, -0.035940639674663544, 0.002320673782378435, -0.005764417815953493, -0.07975194603204727, -0.0038822791539132595, 0.06728507578372955, 0.020742014050483704, 0.08809743821620941, -0.061493389308452606, -0.0485445000231266, -0.022268671542406082, 0.08475345373153687, -0.0030403153505176306, -0.05737586319446564, -0.07930854707956314, -0.01657176949083805, 0.04658877104520798, 0.005716703832149506, -0.04288295656442642, -0.08686209470033646, -0.07359853386878967, 0.02947128191590309, -0.03684910386800766, -0.03841136023402214, 0.01288131158798933, -0.04918907582759857, -0.05579863488674164, 0.06267702579498291, -0.0034505922812968493, 0.034628838300704956, 0.04280426353216171, 0.042202845215797424, 0.012274117209017277, 0.025021208450198174, -0.07867497205734253, 0.03826712444424629, 0.017088277265429497, 0.037250861525535583, -0.016143174842000008, -0.06754780560731888, -0.013957766816020012, 0.1060054823756218, 0.014829001389443874, 0.06808885931968689, 0.022929415106773376, -0.10870063304901123, -0.002258410444483161, 0.009293666109442711, 0.08529872447252274, -0.018672339618206024, -0.06721168756484985, 0.04180533438920975, -0.0031767592299729586, -0.023869113996624947, -0.00011912015179404989, -0.034519728273153305, 0.0022619885858148336, -0.00573525857180357, -0.033912476152181625, 0.059763263911008835, -0.048703599721193314, -0.07433722168207169, 0.04105979949235916, 0.0022583131212741137, 0.03093089908361435, -0.05187990516424179], "CVTTSD":[-0.08537309616804123, 0.0010597433429211378, 0.07481679320335388, 0.05997887998819351, -0.0376993790268898, 0.10309506952762604, 0.07795511186122894, 0.0833413377404213, 0.056095756590366364, 0.05851535126566887, -0.057075001299381256, 0.020756129175424576, -0.08901876956224442, 0.02559811621904373, -0.016971183940768242, -0.04282280057668686, -0.005386374890804291, -0.06672719866037369, -0.09664622694253922, 0.06042492762207985, -0.042353514581918716, 0.06194235011935234, -0.025712836533784866, -0.029526079073548317, 0.044016264379024506, 0.036507125943899155, -0.038406822830438614, 0.006118632387369871, -0.0495009683072567, -0.07487531006336212, -0.07304015755653381, 0.042621925473213196, -0.06314127147197723, 0.03934277594089508, -0.09373295307159424, -0.05887934938073158, 0.010626542381942272, -0.050934500992298126, -0.037448156625032425, 0.01178495679050684, -0.07045318186283112, 0.10210251808166504, -0.07279546558856964, 0.04947654530405998, -0.039519909769296646, 0.07030976563692093, -0.011039734818041325, 0.01187387015670538, -0.0840335488319397, -0.005615191534161568, -0.06869980692863464, -0.012282256036996841, -0.013054385781288147, -0.0711965560913086, 0.015505223535001278, 0.0693473145365715, 0.012862266041338444, -0.04747828096151352, 0.023439936339855194, 0.03891129791736603, -0.04998844489455223, -0.04673001170158386, 0.02121424488723278, 0.0501207634806633, 0.07420068979263306, -0.014888633042573929, 0.007586659397929907, 0.01340668834745884, -0.09216003119945526, 0.09335170686244965, 0.023272672668099403, 0.030810026451945305, 0.05792044475674629, -0.020374637097120285, -0.02717672660946846, 0.028085753321647644, 0.08691198378801346, 0.061656054109334946, -0.07689087092876434, 0.0407567173242569, 0.010403914377093315, -0.03389676660299301, 0.07075867801904678, 0.002534526167437434, -0.026066122576594353, 0.005012217443436384, 0.08335569500923157, -0.02732011303305626, 0.03854125738143921, 0.03336648270487785, -0.10646265000104904, -0.003997548017650843, 0.09871185570955276, 0.0275016650557518, 0.015653448179364204, 0.07066125422716141, 0.05811227858066559, 0.046357106417417526, 0.047027964144945145, 0.07407277077436447], "CVTTSS":[-0.07762601226568222, 0.051891617476940155, 0.02840222790837288, 0.012996217235922813, -0.04709569737315178, -0.011790127493441105, 0.07787185907363892, 0.07411551475524902, 0.04010153189301491, 0.000911108567379415, -0.09610971063375473, 0.042953960597515106, 0.01613607630133629, -0.07504888623952866, -0.04967263713479042, 0.06148393824696541, -0.018901845440268517, 0.08033818751573563, -0.06893469393253326, -0.036083199083805084, 0.08206851035356522, 0.08462843298912048, 0.06728347390890121, -0.03210798278450966, -0.019102206453680992, 0.0723310112953186, 0.009836986660957336, -0.057902153581380844, 0.007954364642500877, -0.015247606672346592, 0.08317636698484421, -0.030078981071710587, -0.003329804167151451, -0.00047014118172228336, -0.02859017252922058, -0.07635723054409027, -0.008230162784457207, 0.03107159025967121, -0.009525406174361706, 0.06515175849199295, -0.06525594741106033, -0.028639627620577812, -0.0781184732913971, 0.009911812841892242, 0.011008340865373611, -0.04294031485915184, -0.04256690293550491, -1.394751961925067e-05, -0.029347950592637062, -0.031849224120378494, 0.012988862581551075, -0.0009693846222944558, -0.019299298524856567, 0.0045416890643537045, -0.04690401256084442, -0.04800841212272644, 0.0020325451623648405, -0.02004505693912506, 0.04130777344107628, -0.033602941781282425, -0.06956057250499725, -0.008079515770077705, 0.0033002288546413183, 0.03853915259242058, 0.08760882169008255, -0.04805464297533035, 0.02319355681538582, 0.018974801525473595, -0.08521144837141037, -0.05224936082959175, -0.023577861487865448, 0.01627342589199543, 0.024244949221611023, 0.09439363330602646, -0.007235093507915735, 0.055853620171546936, -0.00885567907243967, 0.02217228338122368, 0.05414341762661934, -0.0278383269906044, -0.000764147553127259, 0.045272815972566605, -0.009049531072378159, 0.05590446665883064, -0.05074811726808548, -0.06311893463134766, -0.026139337569475174, 0.01067473366856575, -0.043730076402425766, -0.07134802639484406, -0.11087869852781296, 0.05522335693240166, -0.07894640415906906, -0.06710508465766907, -0.022497203201055527, 0.0777427926659584, -0.07944057136774063, 0.05494234338402748, -0.04788406938314438, -0.032921578735113144], - "ConstantPoolIndex":[0.041396364569664, -0.032536957412958145, -0.01450332161039114, -0.006678386591374874, 0.058945223689079285, 0.02544882893562317, -0.03047209233045578, -0.07739393413066864, -0.09328317642211914, -0.01668739691376686, -0.024649402126669884, -0.0379607230424881, -0.11910244077444077, -0.020992999896407127, -0.007654233835637569, -0.005232746247202158, -0.05641235038638115, -0.030478237196803093, -0.11095637828111649, -0.029757868498563766, 0.007831704802811146, -0.06478779017925262, -0.029330771416425705, -0.016729608178138733, 0.016851121559739113, -0.08636923134326935, 0.09819734841585159, -0.06862954050302505, -0.054081980139017105, -0.11573795974254608, 0.025045182555913925, -0.045820001512765884, -0.03937136381864548, -0.0006095073185861111, 0.010480350814759731, 0.04263518005609512, -0.07309181243181229, 0.030367357656359673, 0.05174611508846283, -0.07616177201271057, 0.08458246290683746, -0.05704038590192795, -0.08539492636919022, -0.027642514556646347, -0.01617196388542652, 0.025178344920277596, 0.009598441421985626, -0.02391812391579151, -0.007018273696303368, 0.08220435678958893, 0.019317878410220146, -0.07800780981779099, 0.008812256157398224, -0.08796992152929306, -0.018406951799988747, 0.06285018473863602, 0.0247958917170763, -0.010797450318932533, 0.042904313653707504, 0.04307369515299797, 0.03591239079833031, 0.0318138487637043, -0.052741825580596924, -0.05960077419877052, 0.05289359390735626, -0.07335714250802994, -0.07966916263103485, 0.06509458273649216, -0.014078558422625065, 0.05966315418481827, -0.10191051661968231, 0.038503143936395645, 0.08414285629987717, -0.09167703986167908, -0.03125883638858795, 0.00029595239902846515, -0.05052953213453293, 0.06109768897294998, 0.027757229283452034, 0.07064288854598999, 0.025423981249332428, 0.04430470988154411, 0.006646708585321903, 0.011614424176514149, -0.058028463274240494, -0.026873555034399033, -0.045714568346738815, -0.009242760017514229, -0.08255617320537567, 0.03060135245323181, -0.019932182505726814, -0.07189206779003143, 0.01935136877000332, 0.05297813192009926, 0.004497232846915722, -0.08383949100971222, -0.0008196682319976389, 0.03524069860577583, 0.023135961964726448, 0.00863903108984232], "DEC":[0.0634445771574974, -0.06605149805545807, 0.03212125599384308, 0.030006375163793564, -0.08837386220693588, -0.016591178253293037, -0.03157195448875427, 0.005282422062009573, 0.04301748052239418, -0.035375431180000305, -0.050481121987104416, -0.10733921080827713, -0.03802337497472763, -0.0745977833867073, -0.03943190351128578, -0.014895747415721416, 0.004689200781285763, -0.05872263386845589, -0.02043316885828972, 0.017881838604807854, -0.02151746302843094, 0.049130357801914215, -0.0980888232588768, -0.0012140831677243114, -0.03892286866903305, -0.050167523324489594, -0.06817777454853058, 0.011282221414148808, 0.0848090872168541, -0.04859968274831772, -0.005405630450695753, 0.09327276051044464, -0.031913015991449356, -0.07784294337034225, -0.039762917906045914, -0.0004000961489509791, -0.03763844072818756, -0.024915525689721107, 0.04509890824556351, 0.05546657368540764, -0.055939678102731705, -0.0467451736330986, -0.030023904517292976, -0.010519847273826599, 0.009574057534337044, 0.023444844409823418, 0.007250144146382809, 0.060414351522922516, -0.0011268716771155596, -0.10112253576517105, -0.068567655980587, -0.044332459568977356, 0.0022569731809198856, -0.012019195593893528, 0.0016708170296624303, 0.01029527559876442, -0.024694599211215973, -0.0428738109767437, 0.053816765546798706, 0.09999147802591324, 0.06608963757753372, 0.014324366115033627, 0.022997796535491943, -0.012565241195261478, -0.008212191984057426, -0.012308428063988686, -0.09830988198518753, -0.04177428036928177, 0.03759279474616051, 0.06749766319990158, -0.08330990374088287, -0.06375840306282043, 0.0471678152680397, 0.06524914503097534, 0.09668447077274323, 0.07395336031913757, -0.06081546097993851, 0.0322561152279377, -0.05461571738123894, 0.022349894046783447, 0.0981096625328064, 0.019211066886782646, 0.10566835105419159, 0.004508140496909618, 0.030159158632159233, 0.1076640635728836, -0.004145997576415539, 0.08043811470270157, 0.030684711411595345, 0.07909402251243591, -0.015952520072460175, 0.027102122083306313, 0.017120881006121635, 0.0860346332192421, 0.06145261228084564, -0.01827210932970047, 0.027506740763783455, 0.08201386034488678, -0.09402544051408768, -0.07927247136831284], "DIV":[0.08121486008167267, -0.06398852169513702, -0.007856910116970539, 0.09644383192062378, 0.0013691268395632505, 0.03523438796401024, -0.04342259466648102, -0.011761687695980072, 0.021194210276007652, -0.0386938601732254, -0.004948849324136972, -0.08348845690488815, 0.005121953319758177, -0.06682730466127396, -0.004115825518965721, 0.015023703686892986, 0.042783256620168686, 0.08872916549444199, -0.03392689675092697, -0.014770613051950932, 0.001988545060157776, -0.05145770683884621, -0.029310323297977448, 0.06324473023414612, -0.08066411316394806, 0.006997138261795044, 0.004352204035967588, -0.060964930802583694, 0.02948148362338543, 0.052747759968042374, -0.05635778605937958, -0.014655586332082748, 0.015838103368878365, -0.04539657384157181, 0.031915292143821716, 0.05234432592988014, -0.012030252255499363, 0.06431112438440323, -0.027869969606399536, -0.006431832443922758, 0.025956276804208755, 0.047651831060647964, -0.01758543774485588, 0.07249220460653305, -0.049627624452114105, -0.007435495033860207, 0.0015833197394385934, 2.190603845519945e-05, 0.03457536920905113, 0.03895196691155434, -0.037442032247781754, 0.003120564157143235, -0.0690622553229332, -0.04405339062213898, 0.016464274376630783, -0.05068953335285187, 0.009520933963358402, 0.05033525079488754, 0.030095860362052917, 0.08773164451122284, -0.03623930364847183, -0.0076989103108644485, 0.0133424773812294, 0.025229837745428085, 0.018198521807789803, 0.011319941841065884, -0.005582685582339764, -0.03598775342106819, -0.0565820187330246, 0.08609189838171005, 0.035601116716861725, -0.007436969317495823, -0.018040914088487625, -0.04825054481625557, -0.014956142753362656, 0.03343576192855835, -0.0739198625087738, 0.038971979171037674, -0.03691745549440384, -0.0371851809322834, 0.08137080073356628, 0.03924981504678726, -0.06499960273504257, 0.047913506627082825, -0.0464070662856102, 0.04404731094837189, -0.03972303494811058, 0.03341617435216904, 0.05367732420563698, -0.04457789286971092, -0.07455608248710632, 0.007865827530622482, 0.04562194645404816, -0.03552774339914322, -0.007738951593637466, 0.09388759732246399, -0.015701837837696075, 0.033921483904123306, -0.017276542261242867, 0.04943705350160599], "DIVPDrm":[0.04179735854268074, 0.008989601396024227, 0.0027430830523371696, 0.06804384291172028, -0.06657993793487549, 0.033647675067186356, -0.03707171231508255, 0.08443991839885712, -0.054565757513046265, 0.0765392854809761, -0.08189049363136292, 0.02573087066411972, 0.018917549401521683, 0.079402856528759, -0.011117411777377129, 0.06308865547180176, -0.045432765036821365, -0.05054701492190361, -0.009618235751986504, -0.0594516322016716, 0.07967120409011841, 0.08030137419700623, -0.0768255814909935, -0.061036787927150726, 0.004279104992747307, -0.09737113863229752, 0.07295801490545273, -0.027599459514021873, 0.0045133912935853004, -0.048141367733478546, 0.0003157609316986054, -0.014835191890597343, 0.01462356187403202, -0.03225003555417061, 0.06723359227180481, 0.05244021862745285, 0.07099424302577972, -0.09206876158714294, 0.06154841184616089, -0.022400988265872, 0.034042902290821075, 0.002528816694393754, -0.04578591138124466, -0.023195132613182068, -0.07696253061294556, -0.03475971147418022, 0.03545870631933212, -0.021839862689375877, 0.0036371496971696615, 0.07372148334980011, -0.0596211701631546, -0.06768393516540527, -0.032637521624565125, 0.008432515896856785, 0.007569535635411739, -0.0034237385261803865, 0.05811845883727074, 0.013580343686044216, -0.03924565017223358, -0.025963587686419487, 0.03800642117857933, -0.04651957005262375, -0.033428385853767395, -0.053251899778842926, -0.04647624120116234, -0.034290049225091934, 0.003906013211235404, -0.05534028634428978, 0.04434804245829582, -0.08216925710439682, -0.011801591143012047, -0.006801240611821413, -0.07483590394258499, -0.06332433968782425, -0.005107037723064423, -0.008274846710264683, -0.07277056574821472, 0.03865613043308258, -0.0472225658595562, 0.009775533340871334, 0.055412523448467255, -0.014846398495137691, -0.008565607480704784, -0.018367087468504906, 0.038180120289325714, 0.06085506081581116, -0.02658388763666153, 0.006586031056940556, 0.0761575847864151, -0.007659312803298235, -0.10445686429738998, 0.01846102997660637, 0.02885548584163189, 0.0437043160200119, -0.012576445005834103, 0.04055696353316307, 0.002144219819456339, -0.08052077144384384, 0.03422001749277115, 0.03888843208551407], @@ -62,12 +61,9 @@ "DIV_Fp":[0.0013771128142252564, -0.03939857333898544, 0.06826473772525787, -0.055852942168712616, 0.021110225468873978, -0.07429434359073639, -0.01439732313156128, 0.047745198011398315, 0.03544871136546135, -0.006474921014159918, -0.05228240415453911, 0.00804696511477232, 0.0025021089240908623, 0.049810487776994705, -0.009595588780939579, 0.0507207065820694, -0.040155258029699326, 0.013851179741322994, -0.09630413353443146, -0.012529753148555756, 0.08176414668560028, 0.05994131416082382, 0.0013053410220891237, -0.035347871482372284, -0.06649265438318253, 0.07997933030128479, -0.042037565261125565, -0.06072461977601051, 0.09246786683797836, -0.0072363922372460365, 0.01850724034011364, 0.03905143961310387, -0.07601091265678406, -0.04824458062648773, -0.014410853385925293, -0.06455439329147339, -0.0593516007065773, -0.047922395169734955, -0.07904111593961716, -0.05896637961268425, -0.05629009008407593, -0.08674604445695877, 0.017179397866129875, -0.0020149857737123966, 0.02413070574402809, 0.024688012897968292, 0.027266085147857666, -0.015890855342149734, -0.00813567265868187, 0.024672919884324074, -0.020992467179894447, 0.019298823550343513, 0.022587062790989876, 0.06570186465978622, 0.061541132628917694, -0.07291612029075623, -0.010421186685562134, 0.032753147184848785, -0.06230449676513672, 0.040921296924352646, 0.05855383351445198, -0.035908423364162445, 0.05353318154811859, -0.013773049227893353, 0.0073576089926064014, 0.016397720202803612, 0.03753839433193207, 0.04765179380774498, -0.041083212941884995, -0.013994180597364902, -0.015261827036738396, 0.0982649177312851, 0.05605688691139221, -0.041869863867759705, -0.017181048169732094, 0.03721241280436516, -0.005489564035087824, 0.026647603139281273, -0.07785916328430176, 0.0476430244743824, -0.006558667402714491, 0.06363014876842499, -0.05705825239419937, -0.048359137028455734, -0.09657922387123108, -0.020021332427859306, 0.05151694640517235, 0.0028305412270128727, -0.012787899002432823, -0.09800048917531967, 0.01322718895971775, 0.08181536942720413, -0.04321233555674553, -0.0016350646037608385, -0.03537006303668022, 0.041411954909563065, 0.028577959164977074, 0.01855066418647766, 0.01671769842505455, -0.04467424377799034], "DIV_FpI":[-0.010708109475672245, -0.0732470378279686, -0.033443547785282135, -0.06361733376979828, 0.017653197050094604, -0.030770231038331985, -0.0766882598400116, 0.08713997155427933, -0.0696694403886795, 0.0565333366394043, 0.0079630296677351, 0.009157304652035236, 0.07795052230358124, 0.00863052811473608, 0.009487103670835495, -0.021366223692893982, -0.08859013020992279, -0.052845098078250885, 0.07517100870609283, 0.030445149168372154, -0.031006425619125366, -0.011518558487296104, 0.031634584069252014, 0.006774903275072575, -0.0008412582101300359, 0.05720775946974754, -0.03664165362715721, 0.04671872407197952, -0.04702712222933769, 0.08346223086118698, -0.02042539417743683, 0.005731453187763691, -0.02509506233036518, 0.04370206221938133, -0.06398718804121017, 0.052075039595365524, 0.05920809134840965, 0.0037172543816268444, 0.07034561783075333, -0.018100138753652573, 0.002755390014499426, -0.07121799886226654, 0.03879084065556526, 0.013516174629330635, -0.02845778502523899, 0.019500035792589188, 0.014439111575484276, 0.06561631709337234, -0.10264755040407181, -0.016511712223291397, -0.018063146620988846, 0.08819841593503952, -0.0031949833501130342, -0.07884415239095688, -0.10739012062549591, 0.007700629997998476, 0.049550142139196396, 0.0866587832570076, 0.054501283913850784, 0.10046342760324478, 0.01546743419021368, -0.05334487929940224, -0.02652003802359104, -0.009483176283538342, 0.011785115115344524, 0.04965313896536827, -0.030048802495002747, -0.043639082461595535, -0.004809096921235323, -0.0515226274728775, 0.08381897956132889, 0.003956930246204138, 0.03591177612543106, 0.04015829414129257, -0.03484338894486427, 0.04027436673641205, 0.09792015701532364, 0.013287014327943325, 0.09490979462862015, -0.024792836979031563, 0.04872164502739906, 0.026059577241539955, -0.05917894095182419, -0.011415015906095505, -0.024944854900240898, 0.00499876169487834, -0.06721813976764679, -0.03442658111453056, -0.002175490139052272, 0.0004200722905807197, -0.10891042649745941, 0.021674668416380882, 0.03700282797217369, -0.0014478170778602362, -0.013477527536451817, -0.02742406167089939, -0.01233668439090252, 0.02371596358716488, 0.04435785114765167, -0.03723753243684769], "EH_LABEL":[-0.03541884943842888, 0.012697970494627953, 0.07690317928791046, 0.10800164937973022, -0.033531446009874344, 0.010248170234262943, 0.08690237253904343, -0.018254421651363373, 0.006330807693302631, 0.054908059537410736, 0.05281105265021324, 0.01866377331316471, -0.03986826166510582, 0.012461063452064991, -0.0570770688354969, 0.010465170256793499, -0.0007985670818015933, 0.014928294345736504, -0.08143509179353714, -0.04576095566153526, -0.014382844790816307, 0.09261113405227661, -0.06843073666095734, 0.08642790466547012, 0.010645134374499321, 0.02887858636677265, -0.08228367567062378, 0.06679805368185043, 0.0023300996981561184, 0.02060936950147152, 0.06778941303491592, 0.10305429995059967, 0.06289057433605194, -0.020899023860692978, -0.024045836180448532, 0.0433543361723423, -0.0338776558637619, -0.05156976729631424, -0.02495928853750229, -0.060252029448747635, -0.1022094339132309, 0.014480574987828732, 0.0545940026640892, 0.04824232682585716, 0.06658189743757248, 0.11414545774459839, 0.08304191380739212, -0.03313761577010155, -0.056730128824710846, -0.005165864247828722, -0.00412571569904685, -0.0007486011018045247, -0.03322497382760048, 0.0425163209438324, 0.0785580724477768, 0.015084332786500454, 0.049294766038656235, 0.07518152892589569, -0.008224403485655785, -0.08819448202848434, -0.020814890041947365, 0.054976895451545715, -0.06431052833795547, 0.026952102780342102, -0.02861913852393627, -0.05228573456406593, -0.08044329285621643, 0.02844928950071335, 0.06669115275144577, -0.005387885496020317, 0.05081101134419441, -0.0627083107829094, -0.0785573348402977, -0.042252350598573685, -0.0632990375161171, -0.042457811534404755, -0.07097408920526505, 0.0032806433737277985, 0.039354246109724045, 0.054314617067575455, 0.04231691733002663, 0.00793430395424366, -0.06007056310772896, -0.06129273772239685, -0.008646488189697266, -0.024291129782795906, -0.06316813081502914, 0.02861824445426464, 0.029990797862410545, -0.014714590273797512, 0.005561451427638531, 0.06847269088029861, 0.05630529299378395, -0.015434914268553257, 0.08646618574857712, -0.0025325058959424496, -0.0046173883602023125, -0.04263639822602272, -0.021261105313897133, 0.02382785640656948], - "ExternalSymbol":[0.014755810610949993, -0.049842361360788345, -0.06733497977256775, 0.05401315540075302, 0.061938412487506866, 0.02437831088900566, -0.06823863834142685, 0.03685877099633217, 0.02961423434317112, -0.04944299906492233, -0.1271103173494339, 0.030452819541096687, 0.019848955795168877, -0.03185190260410309, 0.06586895883083344, 0.0007315169204957783, 0.010839227586984634, -0.09547370672225952, -0.01799146644771099, -0.02204788289964199, 0.048699937760829926, 0.004187166225165129, 0.004053634125739336, -0.04464051127433777, -0.005158414598554373, -0.0416896678507328, -0.024279240518808365, -0.05358913540840149, -0.04719633609056473, -0.07180647552013397, 0.02559211477637291, 0.04657098650932312, 0.08353757858276367, -0.0023563469294458628, 0.046847302466630936, -0.03508693352341652, 0.0696689784526825, 0.054716791957616806, -0.012037037871778011, 0.019885245710611343, 0.01824580691754818, -0.06719563156366348, -0.05447190999984741, 0.08877509087324142, -0.01375679112970829, -0.014463561587035656, -0.049798283725976944, 0.06304343044757843, -0.007584648672491312, -0.016156170517206192, 0.024602508172392845, 0.004940119571983814, -0.04088609293103218, 0.0026271860115230083, 0.00787595845758915, -0.01889132149517536, -0.041029710322618484, 0.07343143969774246, -0.02505693957209587, -0.04825644940137863, 0.060728199779987335, 0.00460366066545248, 0.020744791254401207, 0.04238201677799225, -0.024090539664030075, -0.05792662873864174, 0.07639332860708237, -0.07511764764785767, -0.08259762078523636, 0.07901840656995773, -0.000285966758383438, 0.021390466019511223, -0.07818973809480667, -0.02385067008435726, -0.0014113716315478086, -0.055170729756355286, 0.00946732610464096, 0.02471417747437954, 0.07941421121358871, 0.006746167317032814, -0.06766024231910706, -0.089698426425457, 0.01933225803077221, -0.06994582712650299, -0.10149082541465759, 0.06007266044616699, -0.14545120298862457, -0.03447172790765762, 0.03258124738931656, 0.04966919496655464, 0.023691890761256218, -0.014501980505883694, 0.05896589905023575, 0.04760534316301346, -0.017742110416293144, 0.0019451226107776165, -0.01854461058974266, -0.04744676500558853, -0.017504630610346794, 0.05197983980178833], "FLDCW":[-0.0138143515214324, 0.021748993545770645, 7.070673746056855e-05, -0.0897645577788353, 0.09824047237634659, -0.07988506555557251, -0.03454058617353439, 0.0019847718067467213, 0.04983500763773918, 0.03934836760163307, -0.01007675752043724, -0.07798215001821518, -0.08095540851354599, 0.002752745756879449, 0.030696945264935493, 0.017224561423063278, 0.00200466881506145, 0.055515315383672714, -0.06178406998515129, -0.07683275640010834, 0.06503588706254959, 0.06047344580292702, 0.017141321673989296, -0.021984437480568886, -0.05550537258386612, -0.10371828079223633, 0.04531969875097275, 0.04299109801650047, 0.008607891388237476, -0.015554985031485558, -0.08462150394916534, 0.01943030022084713, -0.03486369550228119, -0.06457459926605225, -0.0051103211008012295, 0.05992105230689049, 0.0358397401869297, -0.04655934497714043, -0.018018357455730438, -0.057540085166692734, 0.0061888862401247025, -0.013676634058356285, -0.05362136662006378, 0.06076344475150108, 0.014500541612505913, 0.04466172680258751, 0.025775697082281113, 0.034106262028217316, -0.045596618205308914, 0.022729532793164253, 0.0068075573071837425, 0.033541467040777206, 0.04034329950809479, -0.05922241508960724, -0.11147011071443558, 0.10801365971565247, 0.028543133288621902, -0.076783187687397, 0.0018997815204784274, -0.030598029494285583, 0.04199691861867905, -0.09739390015602112, 0.06310229748487473, -0.03830089420080185, -0.03836864233016968, 0.02324736677110195, 0.10289694368839264, -0.08237223327159882, 0.09511970728635788, -0.022883199155330658, 0.07018155604600906, 0.021149639040231705, 0.06003378704190254, 0.020026177167892456, -0.019267164170742035, 0.06961971521377563, -0.004955677315592766, -0.07218261808156967, 0.08104820549488068, -0.0418921560049057, -0.0317075252532959, 0.020996741950511932, -0.009143776260316372, 0.05348548665642738, -0.0625229999423027, -0.06267517060041428, -0.09454416483640671, -0.043331023305654526, -0.06992270052433014, -0.027888890355825424, -0.08271876722574234, -0.05188243091106415, -0.010446823202073574, 0.05846165865659714, -0.010190286673605442, -0.03009830228984356, 0.03426814824342728, -0.03598400205373764, -0.1076725572347641, -0.028831692412495613], "FNSTCW":[-0.08537304401397705, 0.014420966617763042, 0.026950713247060776, -0.008387862704694271, -0.0038766334764659405, 0.026867343112826347, -0.030130255967378616, -0.04617878049612045, -0.007106459699571133, -0.0215947013348341, 0.007403566502034664, 0.032729458063840866, 0.0008728280663490295, -0.017559584230184555, 0.017324298620224, -0.014857987873256207, -0.03798896074295044, -0.05294371768832207, 0.05491216480731964, -0.04219334200024605, -0.024796022102236748, 0.033826109021902084, 0.04021430388092995, 0.015585671178996563, -0.025553781539201736, -0.011536196805536747, 0.021523986011743546, 0.01087264809757471, -0.023965656757354736, 0.021311553195118904, -0.0554395355284214, -9.890173532767221e-05, -0.0012819130206480622, -0.055725399404764175, 0.008443817496299744, 0.014645406976342201, 0.09493250399827957, 0.005851465743035078, -0.0346904918551445, -0.018780557438731194, -0.0024646760430186987, -0.04922417551279068, -0.025316428393125534, -0.047623440623283386, 0.04252983629703522, 0.008884137496352196, 0.024444259703159332, 0.11018849164247513, 0.06603030860424042, 0.10775407403707504, -0.06696148216724396, 0.07046543061733246, 0.03569186478853226, 0.06831049919128418, 0.10069368779659271, -0.07917457073926926, 0.07819988578557968, 0.0325605608522892, 0.028253860771656036, -0.03586380183696747, 0.08094784617424011, -0.08532348275184631, 0.08135068416595459, 0.08752897381782532, 0.07736475020647049, 0.03881741315126419, 0.01930568367242813, 0.01373430248349905, 0.07003094255924225, 0.021482432261109352, 0.0606292188167572, 0.005889599211513996, -0.06958997994661331, 0.04857232794165611, 0.09418252855539322, 0.030624384060502052, -0.05853968486189842, 0.0978643149137497, 0.042890243232250214, -0.06594833731651306, -0.00445757107809186, 0.028062766417860985, 0.04270890727639198, 0.049651019275188446, -0.10246159136295319, -0.04101993143558502, -0.06874924898147583, -0.047776881605386734, 0.060615722090005875, 0.022016024217009544, -0.0476866140961647, -0.09320542216300964, -0.06186588481068611, 0.030679777264595032, -0.01664678566157818, -0.02508559450507164, -0.0495455376803875, 0.02986457757651806, 0.0242463406175375, -0.03076062723994255], - "FrameIndex":[0.05219179764389992, -0.01926516741514206, -0.021848104894161224, -0.008528115227818489, 0.02989117242395878, -0.012461756356060505, -0.050973404198884964, 0.026713935658335686, 0.01968700997531414, -0.001058116089552641, 0.009182002395391464, 0.03877940773963928, 0.070717453956604, -0.0028735792730003595, 0.0528000183403492, -0.015265910886228085, 0.007753959856927395, 0.01596899703145027, -0.07933179289102554, -0.02578687109053135, 0.02417992427945137, -0.03462255373597145, 0.04385964199900627, 0.004388607107102871, 0.03716951236128807, 0.04064105078577995, 0.07711678743362427, 0.0068300217390060425, -0.05443308874964714, -0.010809220373630524, -0.03124961629509926, 0.004911563824862242, -0.09201066941022873, 0.051436200737953186, 0.015400445088744164, 0.07804328948259354, -0.02971532940864563, -0.0003241244703531265, -0.02131350338459015, -0.09173687547445297, -0.01707594096660614, 0.0025449323002249002, 0.08701702952384949, 0.10675988346338272, -0.05082142353057861, 0.021581847220659256, -0.04104776680469513, 0.08402986079454422, -0.06109907105565071, 0.015201682224869728, 0.04374992102384567, -0.028573378920555115, -0.07767742872238159, 0.07216905802488327, 0.020538095384836197, -0.01229778677225113, 0.003033912740647793, -0.0007747758063487709, -0.09185474365949631, -0.02851664461195469, -0.009441743604838848, 0.05500328913331032, -0.002983751241117716, -0.09198789298534393, -0.051319632679224014, -0.054626885801553726, -0.020108554512262344, 0.0010591084137558937, -0.009138713590800762, 0.07223176956176758, -0.022099260240793228, 0.016025206074118614, -0.05320229008793831, 0.025131219998002052, 0.06626036763191223, 0.07639450579881668, -0.027084894478321075, 0.06581225991249084, -0.017618829384446144, -0.03859466314315796, -0.03385398909449577, 0.018783841282129288, -0.0730312392115593, 0.06957981735467911, -0.03065340407192707, 0.020685074850916862, -0.05311165004968643, 0.09466810524463654, 0.00955914705991745, -0.013919183053076267, -0.05540250986814499, -0.03087283857166767, -0.009688221849501133, 0.016239993274211884, -0.012926830910146236, -0.027712060138583183, -0.06342892348766327, -0.011996395885944366, 0.05536693334579468, -0.04359230771660805], "FsFLD":[-0.0508677139878273, -0.05399654433131218, -0.07149481028318405, -0.047971777617931366, 0.0019320917781442404, -0.007547610439360142, 0.0815814733505249, -0.12202084064483643, -0.08665104955434799, 0.03356856107711792, -0.15713559091091156, -0.0400867722928524, -0.006232412997633219, 0.044278621673583984, 0.09549921005964279, -0.029399411752820015, 0.01864752173423767, -0.04044967144727707, 0.05652021989226341, -0.09881851822137833, 0.025765251368284225, -0.02329906076192856, -0.06028103083372116, 0.09247462451457977, -0.04210466891527176, 0.03263019770383835, -0.03578515350818634, 0.0314578041434288, 0.003650028258562088, 0.04645871743559837, -0.010650137439370155, 0.015904754400253296, 0.018990037962794304, -0.005266033578664064, 0.038479309529066086, 0.008642041124403477, -0.049301791936159134, 0.09484748542308807, 0.005372038576751947, -0.08711376041173935, 0.07584445923566818, 0.09458201378583908, -0.00032702009775675833, 0.048093944787979126, -0.08043119311332703, 0.049779392778873444, -0.006967591121792793, -0.07319328933954239, 0.01582382619380951, -0.006244257558137178, -0.011940727941691875, -0.0013992231106385589, -0.028953444212675095, 0.010995968244969845, -0.005534093361347914, -0.04907146096229553, -0.0039899349212646484, 0.05501222237944603, 0.041574396193027496, 0.030038336291909218, -0.0402531623840332, 0.07675039023160934, 0.01103806123137474, -0.006072944961488247, -0.025336718186736107, 0.06967771798372269, -0.025075508281588554, 0.0031819106079638004, -0.015812508761882782, -0.12114851176738739, 0.07704214751720428, 0.1273191273212433, -0.014406625181436539, -0.031106390058994293, -0.0602225735783577, 0.016253838315606117, -0.059025105088949203, -0.04163780063390732, 0.01571997068822384, 0.025686416774988174, 0.032261066138744354, -0.016690189018845558, 0.014042876660823822, 0.009416786953806877, -0.012661219574511051, 0.013285082764923573, 0.03095356747508049, 0.008239349350333214, 0.0444798618555069, -0.05153216794133186, -0.010029821656644344, -0.015202880837023258, 0.06329496204853058, -0.0590473897755146, 0.08585292845964432, -0.08594027906656265, 0.06057215481996536, 0.01079416275024414, -0.04006461799144745, 0.029236430302262306], - "GlobalAddress":[0.021709734573960304, -0.03253590315580368, -0.04603651538491249, -0.02350226789712906, 0.02841794677078724, 0.01920732669532299, 0.053104616701602936, 0.03941836208105087, -0.01895466446876526, -0.030471740290522575, 0.010719750076532364, 0.020050356164574623, 0.03648754581809044, -0.021573888137936592, -0.02554452419281006, -3.637039117165841e-05, 0.05989491194486618, -0.006903402041643858, -0.08826262503862381, -0.028047384694218636, -0.04230065643787384, -0.05190899223089218, 0.06145390123128891, 0.0005839569494128227, -4.391977927298285e-05, -0.01880771853029728, 0.09660127758979797, 0.04333353415131569, 0.06461602449417114, -0.06010710820555687, -0.0690189078450203, 0.04574553668498993, -0.07640431076288223, 0.01879746839404106, 0.02076675370335579, 0.04869573190808296, 0.025147439911961555, 0.05311164632439613, 0.05711919441819191, 0.049520380795001984, 0.041169121861457825, -0.0603964701294899, -0.04195070639252663, 0.07676130533218384, -0.015161959454417229, 0.02903268299996853, -0.027548301964998245, 0.04705912992358208, -0.11194053292274475, -0.008245207369327545, -0.07792827486991882, -0.019468743354082108, 0.05482499673962593, -0.0028855702839791775, 0.05478052794933319, 0.07484771311283112, -0.011742575094103813, 0.00923923309892416, -0.05074375122785568, 0.06956734508275986, -0.045990440994501114, 0.007280972320586443, 0.040920473635196686, -0.09143709391355515, -0.06105270981788635, -0.0021254979074001312, -0.09519167989492416, 0.06324268877506256, -0.0693386048078537, -0.05100148543715477, 0.010643817484378815, -0.008162467740476131, -0.08811189234256744, -0.08640385419130325, 0.0077143507078289986, 0.030832089483737946, -0.01504515577107668, 0.07277517020702362, 0.02581198327243328, -0.052599068731069565, -0.06478387117385864, 0.01634707674384117, -0.021173706278204918, 0.030482977628707886, -0.09826494008302689, 0.07716016471385956, -0.10845024883747101, 0.04479274898767471, -0.015128640457987785, -0.03491876646876335, 0.05239150673151016, -0.03427724912762642, 0.06768845021724701, -0.04174086079001427, -0.05136744678020477, 0.0037109211552888155, -0.030324269086122513, -0.06928850710391998, -0.0395960658788681, 0.07726000994443893], "IDIV":[-0.03631015121936798, -0.07882149517536163, -0.010781447403132915, -0.025117948651313782, 0.01618420146405697, 0.044446997344493866, 0.011386583559215069, -0.00582836102694273, -0.012903614901006222, 0.006322081200778484, -0.07392880320549011, -0.1300479620695114, -0.05186808854341507, -0.06542935222387314, 0.08297666162252426, 0.03790606930851936, -0.07716395705938339, 0.02288512885570526, -0.038660015910863876, -0.04705967381596565, -0.00015759489906486124, -0.06133948266506195, -0.022438891232013702, -0.012017307803034782, 0.01929904706776142, 0.007114879786968231, 0.00567955756559968, -0.041199274361133575, 0.08304950594902039, 0.044402915984392166, -0.10634922981262207, -0.009510381147265434, 0.009772839024662971, -0.048219580203294754, -0.0321214459836483, 0.008684953674674034, 0.009846106171607971, 0.011280585080385208, 0.0310650784522295, 0.05677618831396103, 0.025418052449822426, -0.022629115730524063, 0.0074129728600382805, 0.1081111952662468, -0.03284893184900284, 0.002745774807408452, 0.05030296742916107, 0.04322626441717148, 0.005321172997355461, 0.03260405734181404, -0.051505692303180695, -0.033541131764650345, -0.03955534100532532, 0.047906432300806046, 0.02181984856724739, -0.0026405092794448137, 0.03350621834397316, -0.10710552334785461, -0.01533215120434761, -0.06872875243425369, -0.015413723886013031, -0.007149300072342157, -0.03660491481423378, -0.003503897227346897, -0.02898445539176464, 0.040071532130241394, 0.019684670493006706, -0.10101661086082458, -0.08199643343687057, 0.05637385696172714, -0.03792939707636833, 0.03106122836470604, -0.0590706542134285, -0.03607700765132904, -0.09597010910511017, -0.005815848242491484, 0.017992950975894928, 0.0007907312246970832, 0.04653536528348923, -0.03997295722365379, 0.006737773306667805, 0.11695551127195358, 0.022216010838747025, 0.041878726333379745, -0.035456813871860504, 0.04327021911740303, -0.03799387812614441, 0.10658515244722366, 0.010188632644712925, 0.09275273978710175, 0.09797771275043488, -0.12400814890861511, 0.03475511074066162, -0.08061601221561432, 0.022533612325787544, -0.11562027782201767, -0.026964085176587105, 0.08614259958267212, -0.025526022538542747, 0.040927182883024216], "ILD_Fp":[0.01509383600205183, -0.044326793402433395, -0.051242612302303314, -0.053859174251556396, -0.013097256422042847, -0.06370041519403458, 0.06120477616786957, 0.050328709185123444, -0.04184471070766449, 0.023432370275259018, -0.06435256451368332, 0.02055867575109005, 0.08239544183015823, 0.012251744978129864, -0.05063817650079727, 0.04293346777558327, -0.05919358506798744, -0.03159564360976219, -0.0037220751401036978, -0.001002405071631074, -0.026786377653479576, -0.07405146211385727, 0.044357798993587494, 0.08067265897989273, -0.05229390412569046, -0.06903751194477081, 0.010448710061609745, 0.006885232869535685, -0.052135784178972244, 0.08535145968198776, 0.041820794343948364, -0.020588336512446404, 0.07256042212247849, -0.017755955457687378, -0.032768987119197845, 0.06633710861206055, -0.03427698463201523, -0.10930930078029633, 0.05371936410665512, -0.06794329732656479, -0.014769122004508972, -0.07577606290578842, 0.07853815704584122, -0.09360899031162262, 0.05865737050771713, -0.034065186977386475, 0.05096115916967392, 0.0888199508190155, -0.03904300555586815, 0.03125728294253349, -0.0634637326002121, 0.03385297581553459, 0.027269205078482628, -0.07597903162240982, 0.008366324007511139, -0.03017764538526535, 0.011727942153811455, -0.04941355064511299, 0.027957690879702568, 0.09743025153875351, 0.004836047999560833, -0.028614182025194168, 0.016423141583800316, 0.0895770713686943, 0.025168858468532562, 0.030979957431554794, 0.016665387898683548, 0.025412173941731453, -0.035893514752388, -0.05403519794344902, 0.02931641787290573, 0.07742571830749512, -0.07045850157737732, -0.03433118015527725, -0.03651195392012596, -0.04036823660135269, -0.08663841336965561, 0.05561026185750961, 0.06927209347486496, -0.010819001123309135, -0.10697789490222931, 0.009881369769573212, 0.055065181106328964, -0.06379911303520203, 0.04137800633907318, 0.030417418107390404, -0.03515362739562988, -0.09139228612184525, 0.029920026659965515, 0.027388064190745354, -0.06739232689142227, 0.07639766484498978, -0.044223885983228683, 0.02472294308245182, -0.052025098353624344, 0.014643780887126923, 7.120784721337259e-05, 0.018760213628411293, -0.002873474732041359, 0.015561423264443874], "IMPLICIT_DEF":[-0.026583483442664146, -0.03995991870760918, 0.03633055090904236, -0.04622741788625717, -0.02326572686433792, 0.02231338992714882, -0.014788332395255566, -0.09906739741563797, 0.022785643115639687, -0.014632754027843475, -0.1041543111205101, 0.05013664439320564, -0.08690599352121353, -0.08063319325447083, 0.030247388407588005, -0.09707676619291306, 0.03499408811330795, 0.012669776566326618, 0.06481463462114334, -0.040453050285577774, -0.0489707849919796, -0.07584276050329208, 0.001047363504767418, 0.08496157824993134, 0.02357148937880993, -0.06866959482431412, 0.09267362207174301, 0.030527250841259956, -0.031355831772089005, 0.02419896423816681, -0.02442512847483158, 0.029297800734639168, 0.10321355611085892, 0.06579483300447464, -0.012722077779471874, 0.10042434185743332, -0.004708406049758196, 0.007217984646558762, 0.0753282904624939, -0.07088368386030197, -0.07383686304092407, 0.06410741060972214, 0.06312107294797897, 0.06989452987909317, 0.03766098991036415, -0.0008440924575552344, -0.023516006767749786, -0.04153933748602867, 0.07342316210269928, 0.05416297912597656, -0.02841850183904171, 0.04128013551235199, -0.001023625023663044, 0.005061942618340254, -0.06027042120695114, 0.025808431208133698, 0.027118714526295662, -0.08965771645307541, 0.012222534976899624, 0.008590211160480976, -0.01785023882985115, 0.03389652445912361, 0.0038459128700196743, 0.021088456735014915, -0.060241442173719406, 0.052924126386642456, -0.03849414363503456, 0.0044007860124111176, 0.05139085650444031, -0.06002991273999214, 0.026294095441699028, 0.06567239761352539, 0.1145782321691513, -0.02774081937968731, -0.07959162443876266, -0.00901349913328886, -0.09212079644203186, -0.016664501279592514, -0.019095804542303085, 0.05008011311292648, -0.016630882397294044, -0.007292845752090216, 0.01243519689887762, 0.011623953469097614, -0.0202464796602726, 0.08120717853307724, 0.04192841053009033, -0.014358888380229473, 0.0402902215719223, -0.05741799250245094, 0.0023748986423015594, -0.0007613254711031914, -0.11052780598402023, -0.08283583074808121, -0.018524790182709694, -0.09601832926273346, 0.037600427865982056, -0.06403559446334839, -0.08838459849357605, 0.01904650405049324], @@ -75,12 +71,10 @@ "INC":[-0.04204729199409485, -0.04558457434177399, -0.004308773670345545, 0.08560862392187119, -0.025844622403383255, -0.01385454647243023, -0.06715847551822662, 0.04059276729822159, 0.0008142509614117444, -0.04987747594714165, 0.05252164602279663, -0.07536070048809052, 0.012251293286681175, -0.01428443193435669, 0.028742481023073196, -0.024608345702290535, 0.009724774397909641, -0.024144234135746956, -0.04345421493053436, -0.03454094007611275, -0.03657921776175499, -0.025569358840584755, 0.04140102490782738, -0.02267373353242874, -0.05346262827515602, -0.07470668852329254, -0.03458420932292938, -0.015982985496520996, 0.013558092527091503, -0.029305797070264816, 0.026653757318854332, -0.00041234202217310667, 0.038508299738168716, 0.08509717136621475, 0.0016276738606393337, -0.013578594662249088, 0.05669381096959114, 0.0274334829300642, 0.023921431973576546, -0.02701006643474102, -0.09357035905122757, 0.07844959199428558, -0.03195708245038986, 0.044196177273988724, 0.017355425283312798, -0.04172753170132637, -0.07773707062005997, 0.018204662948846817, -0.07242465019226074, 0.07735569030046463, 0.03859752044081688, 0.08490721136331558, 0.04661087319254875, 0.015468046069145203, 0.02267235703766346, -0.030244702473282814, -0.043930262327194214, -0.015585970133543015, -0.004605699330568314, 0.0052457586862146854, -0.027553195133805275, -0.06406774371862411, 0.008009923622012138, -0.09624558687210083, 0.07006736844778061, 0.052846722304821014, -0.029392898082733154, -0.0659954622387886, -0.10725440829992294, 0.04428407922387123, 0.02606845460832119, 0.018936248496174812, -0.013534934259951115, 0.03338829427957535, -0.06049540638923645, 0.007389454171061516, 0.030835872516036034, -0.026952944695949554, -0.008518273010849953, 0.07688802480697632, 0.03663042560219765, -0.09961165487766266, -0.02765841968357563, 0.06263019144535065, -0.003026304766535759, -0.0023868512362241745, -0.052803706377744675, 0.04688272252678871, 0.08415349572896957, -0.044724639505147934, 0.01759890839457512, 0.022962408140301704, 0.00944716576486826, -0.084384024143219, -0.02845100499689579, -0.05094959959387779, -0.08001884073019028, 0.0449872724711895, -0.05161838233470917, 0.015422065742313862], "INLINEASM":[0.09296883642673492, -0.007579821161925793, 0.05054628103971481, 0.0011402746895328164, -0.02369365282356739, -0.040429845452308655, 0.048763860017061234, -0.012725423090159893, -0.017820369452238083, -0.0700153335928917, -0.00037883210461586714, 0.06301063299179077, 0.0503254272043705, 0.023893356323242188, -0.07308998703956604, 0.058056626468896866, -0.002504807896912098, -0.03528450429439545, -0.0775352418422699, -0.08423604816198349, 0.01841139607131481, 0.07128658145666122, 0.01363592129200697, 0.05391324311494827, 0.04803359508514404, 0.06145099550485611, -0.03153276443481445, 0.019207997247576714, 0.07138897478580475, 0.06972941011190414, 0.06482893973588943, -0.019937975332140923, -0.00694684125483036, 0.0624234639108181, 0.08495642989873886, 0.017590269446372986, -0.0075670769438147545, 0.05114367976784706, 0.031221428886055946, -0.07108655571937561, -0.018287384882569313, 0.035706836730241776, -0.0794610008597374, -0.03627452626824379, -0.06174106150865555, -0.036826081573963165, -0.030408767983317375, 0.008271732367575169, -0.09423738718032837, 0.004248321522027254, 6.044749170541763e-05, 0.011095447465777397, -0.10245273262262344, -0.07278212904930115, -0.00845671258866787, 0.008961541578173637, 0.019341865554451942, 0.010205359198153019, 0.0724569708108902, -0.08050914853811264, -0.057010360062122345, 0.05053231865167618, -0.04844024032354355, 0.057458631694316864, 0.007486356887966394, -0.029497744515538216, 0.009812748059630394, -0.05314056575298309, 0.11012034863233566, -0.0647352784872055, 0.017479702830314636, -0.027027146890759468, -0.015448061749339104, 0.06321517378091812, -0.06948030740022659, 0.030430838465690613, -0.022251488640904427, -0.0358838327229023, 0.020705783739686012, -0.10970951616764069, -0.07724311202764511, 0.03224516287446022, 0.004828427918255329, 0.07738938182592392, -0.0036471053026616573, 0.06867322325706482, -0.07092054188251495, -0.024759342893958092, -0.054835252463817596, 0.019259851425886154, 0.011149682104587555, -0.09652992337942123, 0.050764426589012146, -0.0809553936123848, -0.04605351760983467, 0.0399462915956974, 0.05396333709359169, -0.01706104166805744, -0.031266387552022934, 0.020599452778697014], "IST_Fp":[-0.046584248542785645, -0.07452045381069183, 0.03998621925711632, 0.03091888502240181, 0.016272397711873055, -0.00985297653824091, -0.007199955638498068, -0.03536335751414299, 0.01673988439142704, 0.07562774419784546, 0.023876583203673363, -0.008683494292199612, 0.04009688273072243, -0.03663905709981918, -0.014492983929812908, 0.07349997758865356, 0.028999919071793556, -0.07499339431524277, -0.03727814927697182, -0.046455491334199905, -0.032447993755340576, 0.02374599315226078, -0.044662121683359146, -0.025333719328045845, 0.037562429904937744, 0.0006656686891801655, -0.00804421491920948, 0.06697870045900345, 0.04367857426404953, -0.0583018884062767, 0.03050180710852146, 0.053111929446458817, -0.04168881103396416, -0.027295507490634918, 0.057777389883995056, 0.08833678811788559, -0.026598922908306122, 0.005393106956034899, -0.05517015606164932, -0.0731138065457344, 0.07386088371276855, -0.07228095829486847, 0.023828018456697464, -0.0025013380218297243, -0.012031037360429764, 0.029700662940740585, -0.101964570581913, 0.0899822935461998, 0.013285316526889801, 0.002607472240924835, 0.04784732311964035, -0.044669900089502335, -0.04348702356219292, -0.07007527351379395, -0.016267215833067894, 0.059609103947877884, -0.036534957587718964, 0.013465121388435364, 0.10186120122671127, 0.015473871491849422, -0.08443709462881088, -0.004981503821909428, 0.06996916979551315, 0.011159068904817104, -0.07315052300691605, 0.024891534820199013, 0.0426689088344574, 0.008847315795719624, -0.06540054082870483, -0.09095568209886551, 0.053956128656864166, -0.010535894893109798, 0.035168495029211044, 0.04921877756714821, -0.07781729847192764, 0.006958760786801577, -0.05714801698923111, -0.06458019465208054, -0.055241748690605164, -0.007552466355264187, -0.02490214817225933, -0.014270482584834099, 0.03710750862956047, 0.003406278323382139, -0.044638775289058685, -0.09159127622842789, -0.025353819131851196, -0.07952282577753067, -0.02874225378036499, -0.06654132902622223, 0.0031955954618752003, 0.0602104589343071, -0.09261002391576767, -0.06175351142883301, 0.01194009743630886, -0.0348934531211853, 0.04460763558745384, -0.08773446083068848, 0.04335169121623039, 0.054603610187768936], - "Immediate":[-0.039664868265390396, 0.028720445930957794, -0.057207897305488586, 0.04179477319121361, 0.04477043077349663, 0.020050648599863052, -0.056656818836927414, -0.025030966848134995, -0.04394019395112991, 0.04849115386605263, 0.012325904332101345, 0.06731707602739334, 0.04568001255393028, -0.04773757979273796, -0.012142524123191833, -0.03986259177327156, -0.027249159291386604, -0.04930245876312256, -0.10542229562997818, -0.05678592994809151, -0.038303568959236145, -0.07283245027065277, 0.0217409897595644, -0.01139344647526741, 0.006936497986316681, -0.04702157527208328, 0.09977010637521744, -0.035237088799476624, 0.028822069987654686, -0.0691431537270546, -0.0829710066318512, -0.1289154589176178, -0.08470306545495987, -0.06731563061475754, 0.06642980873584747, 0.026025734841823578, -0.04049745202064514, 0.030080674216151237, 0.04203929752111435, 0.06834205985069275, 0.04315062239766121, 0.00788890291005373, 0.03426999971270561, 0.08819636702537537, 0.004112098831683397, 0.03392210975289345, 0.010541473515331745, 0.08045777678489685, -0.02914009988307953, 0.0624285452067852, 0.03299122676253319, -0.05355033650994301, -0.07568570226430893, 0.08106201142072678, 0.0376802459359169, -0.04886564612388611, -0.10992937535047531, -0.00761816743761301, -0.014918084256350994, 0.03816765174269676, -0.04981819912791252, 0.00031993765151128173, 0.011382698081433773, -0.029902901500463486, -0.0117422454059124, -0.057965945452451706, -0.09519924223423004, 0.020727403461933136, -0.04526710882782936, 0.09883677959442139, 0.018033087253570557, -0.003035350237041712, -0.06968960911035538, -0.09893210977315903, -0.01264366414397955, 0.017397744581103325, -0.08519260585308075, 0.09382850676774979, -0.055508699268102646, -0.026548130437731743, -0.013868317008018494, -0.03162496164441109, 0.06089535728096962, -0.01583624631166458, -0.060260944068431854, 0.06709896773099899, -0.09333796799182892, -0.02887417934834957, -0.03424007445573807, -0.01687423326075077, 0.11968979239463806, -0.08361987769603729, 0.09037765115499496, -0.04322688281536102, -0.040831610560417175, -0.061376459896564484, -0.03485504537820816, 0.016033072024583817, 0.004106835462152958, -0.03354674205183983], "Int_MemBarrier":[0.0418969988822937, -0.06285926699638367, -0.018717624247074127, -0.0031687396112829447, 0.04023218899965286, 0.08492552489042282, -0.06942103803157806, 0.005588027182966471, -0.08964942395687103, 0.055396437644958496, -0.06732998788356781, 0.06981600075960159, -0.05258888751268387, -0.06051918491721153, 0.02948639541864395, -0.04473342001438141, 0.01574157550930977, -0.04423875734210014, -0.053338322788476944, 0.008577392436563969, 0.10632415115833282, 0.040030092000961304, 0.02552260458469391, 0.026821544393897057, -0.05510386824607849, 0.05976655334234238, -0.0008300095796585083, 0.06861157715320587, -0.049591872841119766, -0.07650840282440186, -0.004643433261662722, -0.03990425914525986, 0.06366871297359467, -0.014906020835042, -0.06371121108531952, 0.0194997675716877, -0.07784571498632431, 0.029953552410006523, 0.06530797481536865, -0.09173597395420074, 0.021494632586836815, 0.052978403866291046, -0.001283245743252337, -0.05061378329992294, 0.04639996960759163, 0.06478390842676163, -0.015909312292933464, 0.013739313930273056, -0.06675873696804047, -0.0704226866364479, 0.020883914083242416, 0.07323179394006729, -0.0010066484101116657, -0.002373248105868697, -0.07056596130132675, 0.024577656760811806, 0.04880139231681824, -0.038608577102422714, 0.07695038616657257, 0.002806240925565362, 0.006876204162836075, 0.006961337756365538, 0.059363361448049545, 0.021191507577896118, -0.06366844475269318, -0.015020458959043026, -0.0815785601735115, 0.004222068004310131, -0.07691111415624619, 0.02711009606719017, 0.014720573090016842, 0.022912023589015007, 0.05272422730922699, 0.08111070841550827, -0.018083568662405014, -0.0418405644595623, 0.08496879786252975, -0.04420621693134308, 0.090696781873703, -0.02872851863503456, -0.024066468700766563, 0.07789512723684311, -0.012021118775010109, 0.041637614369392395, 0.07615016400814056, -0.042834896594285965, 0.05792360380291939, -0.051077719777822495, -0.05241186544299126, 0.006270663347095251, -0.008865885436534882, -0.09101007878780365, 0.009276151657104492, 0.036050815135240555, -0.06729964166879654, -0.014552133157849312, -0.06943532824516296, -0.023805340752005577, -0.058313168585300446, -0.04949163272976875], "JCC_":[-0.03625413775444031, -0.041811503469944, -0.07486920803785324, -0.05052778869867325, 0.021635157987475395, -0.045879144221544266, 0.014834613539278507, -0.03941917419433594, -0.010327291674911976, -0.08194752782583237, 0.049111511558294296, 0.05970187485218048, 0.03878019377589226, -0.08208157867193222, 0.11816514283418655, -0.0021148237865418196, 0.022616155445575714, 0.02145639806985855, -0.056387223303318024, -0.07890307158231735, 0.049655016511678696, -0.09555239230394363, -0.07599814981222153, 0.04143097624182701, -0.029399001970887184, 0.01379090640693903, 0.04894237220287323, 0.04915700852870941, 0.020924754440784454, 0.11983200162649155, -0.045743830502033234, 0.04826069250702858, 0.06473162770271301, 0.032176557928323746, 0.012342192232608795, 0.03632035106420517, -0.011231182143092155, 0.03319219872355461, 0.012383898720145226, 0.017726020887494087, -0.027707353234291077, 0.052987076342105865, -0.06459034234285355, 0.03180805966258049, 0.038370322436094284, -0.018640436232089996, -0.05121193453669548, -0.052741218358278275, 0.0953487753868103, 0.0914265364408493, 0.08409767597913742, -0.009599939920008183, 0.02045055478811264, 0.009363643825054169, -0.00872961338609457, -0.08178623765707016, -0.008178372867405415, -0.005903102457523346, 0.05836755037307739, 0.011602274142205715, -0.02761419117450714, 0.016957316547632217, 0.04471946507692337, 0.005247261840850115, -0.05416998639702797, 0.00770663283765316, -0.06152857095003128, 0.021657155826687813, -0.04485960677266121, -0.0008541923016309738, 0.053551655262708664, 0.062185727059841156, -0.012641278095543385, -0.020507624372839928, -0.02900690771639347, 0.019629495218396187, 0.05620177462697029, -0.07772354781627655, -0.025509009137749672, 0.01923682540655136, 0.03035508468747139, 0.018665296956896782, 0.013450516387820244, 0.06740278005599976, 0.013274379074573517, 0.011593983508646488, 0.02331095188856125, 0.048694003373384476, 0.05861792340874672, -0.021130137145519257, 0.02437412552535534, 0.059087324887514114, 0.024816056713461876, -0.050772879272699356, -0.01114521361887455, -0.028665395453572273, -0.09630053490400314, 0.0039062038995325565, -0.08236120641231537, 0.019473683089017868], "JMP":[-0.021766331046819687, -0.021576769649982452, -0.03795000538229942, 0.10449998080730438, -0.037742577493190765, -0.009156269021332264, 0.015289359726011753, 0.03519408404827118, -0.034353505820035934, 0.03226960077881813, 0.07340928167104721, 0.06086661294102669, 0.05736850947141647, -0.01725650765001774, -0.06702736765146255, -0.014972181059420109, 0.03435607627034187, 0.012023050338029861, 0.03370668366551399, -0.022338073700666428, -0.08280093967914581, -0.08060947060585022, 0.012210523709654808, -0.08165933936834335, 0.0016056479653343558, 0.015586943365633488, 0.11792927235364914, 0.06917431950569153, 0.02870137430727482, 0.01961304247379303, -0.027661900967359543, 0.10504695773124695, -0.03640349581837654, -0.01896090805530548, -0.011636625044047832, -0.04474593698978424, -0.029941411688923836, -0.058342345058918, 0.05885041877627373, 0.05553867667913437, 0.03953809291124344, 0.06787443161010742, 0.002061075298115611, 0.027305128052830696, 0.05792280659079552, 0.08001891523599625, 0.026575665920972824, -0.0171738862991333, -0.010685772635042667, -0.05422135442495346, 0.03660969436168671, 0.03091355785727501, -0.05900857225060463, 0.08500046283006668, -0.08218419551849365, 0.061078935861587524, 0.018783383071422577, 0.047520000487565994, -0.00014930205361451954, 0.002577823819592595, -0.06816059350967407, 0.041743114590644836, 0.03372296690940857, 0.016127480193972588, -0.07235685735940933, 0.024466760456562042, -0.03468412905931473, 0.037008773535490036, -0.060657840222120285, 0.016427740454673767, 0.08229042589664459, -0.061172664165496826, -0.009794612415134907, -0.024358782917261124, -0.06573519110679626, 0.09360098838806152, -0.07428182661533356, -0.02529928646981716, 0.09198813885450363, 0.025180503726005554, 0.03200048953294754, 0.018081925809383392, 0.0034776402171701193, 0.07848992198705673, -0.00043209362775087357, -0.01768604852259159, -0.043686315417289734, 0.04550321400165558, 0.11878672987222672, -0.008190528489649296, 0.003286525374278426, 0.06845948845148087, 0.04892893135547638, -0.053277406841516495, -0.016919657588005066, 0.032096169888973236, 0.02839065156877041, -0.01713993400335312, -0.15167304873466492, -0.02013365738093853], "JMP_":[-0.014233234338462353, -0.0260892603546381, -0.13750334084033966, -0.050227466970682144, -0.042988359928131104, -0.027947310358285904, 0.08639533072710037, -0.16317786276340485, -0.03907149285078049, -0.05328908935189247, -0.03975899517536163, 0.04182944446802139, -0.010540750809013844, -0.11645861715078354, -0.012753792107105255, 0.002367585664615035, -0.05188040807843208, 0.0033823091071099043, -0.01240340806543827, -0.06099176034331322, -0.0015601427294313908, -0.11171454191207886, -0.04928319901227951, 0.05990544706583023, 0.015553089790046215, 0.04499414563179016, -0.034520961344242096, 0.07318194955587387, 0.013978325761854649, 0.07317976653575897, -0.029100794345140457, -0.09544635564088821, 0.030067358165979385, 0.057544808834791183, 0.005057932808995247, 0.005621553864330053, -0.03627946600317955, -0.0391962006688118, 0.03113878332078457, -0.02958066016435623, 0.012381716631352901, 0.011978821828961372, 0.13839371502399445, 0.010590317659080029, 0.06677765399217606, 0.046147286891937256, -0.05033441260457039, -0.020135121420025826, 0.032657306641340256, -0.05044032260775566, 0.05499301478266716, 0.07406507432460785, -0.0011679750168696046, 0.000989275984466076, 0.029161963611841202, 0.02679276280105114, 0.024040302261710167, 0.0710899606347084, -0.0035478041972965, -0.03730632737278938, -0.014350024051964283, 0.1638166308403015, -0.10163120925426483, 0.02900329977273941, -0.05366139113903046, 0.07186686992645264, -0.041340481489896774, 0.0401119627058506, -0.002295189071446657, -0.07949572801589966, -0.011504769325256348, 0.10675538331270218, -0.012056156061589718, -0.00748586468398571, -0.039624687284231186, -0.03555607795715332, -0.06799864768981934, -0.04550764709711075, -0.03302829712629318, -0.008404256775975227, 0.10563746094703674, -0.026095328852534294, 0.07613116502761841, 0.02101682499051094, 0.018749620765447617, 0.0056787943467497826, 0.005889789201319218, 0.03994893655180931, 0.05512934923171997, -0.004684743471443653, -0.01083239447325468, 0.0003112686099484563, 0.024348445236682892, -0.02665846049785614, 0.0064091463573277, -0.02719639055430889, 0.11076066642999649, -0.0014569570776075125, 0.0050220787525177, 0.032427236437797546], - "JumpTableIndex":[-0.007416237145662308, 0.0038157713133841753, 0.05180662125349045, 0.03776901960372925, -0.011749244295060635, -0.02952706068754196, -0.06646136939525604, 0.02088487148284912, -0.001927916775457561, 0.018895410001277924, 0.0509350448846817, 0.057210080325603485, -0.0476078987121582, -0.00016809302906040102, -0.02341553010046482, -0.06734820455312729, 0.02047930844128132, 0.009282611310482025, 0.0038133300840854645, 0.0020261742174625397, -0.09253961592912674, 0.0766557827591896, -0.049570225179195404, -0.11510220915079117, -0.009570423513650894, -0.007274465169757605, 0.07750000059604645, 0.02489926479756832, -0.08297400176525116, 0.048176445066928864, 0.03797437995672226, 0.060842450708150864, 0.020265065133571625, -0.03559373319149017, 0.03493893891572952, -0.0036544676404446363, 0.010211148299276829, -0.06471849977970123, -0.034595828503370285, -0.05245388671755791, -0.0014119939878582954, 0.008752748370170593, -0.020637203007936478, 0.053244929760694504, 0.052053239196538925, 0.014706660993397236, 0.02803724631667137, -0.07983336597681046, 0.03106858767569065, 0.001688914722763002, -0.07647732645273209, -0.028148295357823372, -0.0528123639523983, 0.08006428182125092, -0.06398879736661911, -0.033476538956165314, 0.05217607319355011, -0.03093232959508896, 0.044230975210666656, 0.05123162269592285, -0.05225585401058197, 0.06976816058158875, -0.0014492797199636698, 0.03833283483982086, 0.08385992050170898, -0.04722217097878456, -0.00226160092279315, -0.027254855260252953, -0.09566919505596161, 0.02109321765601635, -0.032354824244976044, 0.08032239973545074, -0.046937450766563416, -0.004326784983277321, -0.026024870574474335, 0.12039119750261307, 0.1016048863530159, 0.06808122247457504, -0.012297546491026878, -0.06450799852609634, 0.015778351575136185, 0.012280710972845554, 0.04002666845917702, 0.04792468994855881, -0.06248988211154938, -0.054222140461206436, 0.018379682675004005, -0.0029111658222973347, 0.016062958166003227, 0.09880068898200989, 0.03846307471394539, 0.04975416138768196, 0.07305088639259338, -0.020941948518157005, -0.020897891372442245, 0.03872328996658325, -0.05682756006717682, 0.09583723545074463, 0.0028475294820964336, -0.05127262324094772], "LCMPXCHG":[0.0649508610367775, -0.04321656376123428, 0.08405561745166779, -0.07786691188812256, -0.05277935788035393, 0.011031142435967922, -0.0015533932019025087, 0.08730415254831314, -0.004414519295096397, 0.04040057212114334, -0.005748671013861895, -0.013907546177506447, 0.1028006374835968, 0.09900037944316864, -0.06475479900836945, 0.024365412071347237, -0.0727076306939125, 0.06610138714313507, -0.026073187589645386, 0.08258920162916183, -0.007938066497445107, 0.07641425728797913, 0.10221290588378906, 0.029036179184913635, -0.024506229907274246, 0.00953623466193676, -0.03283938392996788, -0.07194274663925171, -0.023513879626989365, -0.017550935968756676, -0.037860531359910965, 0.042062658816576004, 0.0501263290643692, 0.02325640618801117, 0.0018605751683935523, 0.012687316164374352, -0.016979143023490906, -0.059858907014131546, -0.07078705728054047, 0.033630695194005966, 0.036799900233745575, -0.03821465000510216, -0.059619177132844925, -0.06309511512517929, 0.0019384543411433697, -0.053095221519470215, 0.00571654736995697, 0.07134073972702026, -0.02115899883210659, 0.021287376061081886, -0.04855392873287201, 0.0103003466501832, -0.008993818424642086, 0.05131004378199577, -0.0734843909740448, 0.017303360626101494, 0.008291462436318398, 0.046435531228780746, -0.055057018995285034, -0.05454597249627113, -0.009126733057200909, -0.0012434959644451737, -0.0846821740269661, -0.017736544832587242, -0.04779898375272751, 0.020568806678056717, -0.061118245124816895, -0.012131555937230587, 0.024907736107707024, -0.0161012914031744, -0.011221951805055141, -0.029136324301362038, 0.04336633160710335, -0.00514700124040246, 0.004810850135982037, 0.014044326730072498, -0.07381691038608551, -0.064864382147789, 0.041784100234508514, 0.06648915261030197, 0.038817185908555984, -0.03421948850154877, 0.019546108320355415, -0.00579161336645484, -0.06579872220754623, -0.01745537295937538, -0.07164284586906433, 0.032588109374046326, 0.009170422330498695, -0.08387100696563721, -0.04743993282318115, 0.05926872417330742, 0.03129392862319946, -0.012995549477636814, 0.007799868006259203, 0.036110181361436844, 0.01603531278669834, -0.09735894203186035, 0.014374110847711563, -0.023844046518206596], "LD_Fp":[0.09850919246673584, 0.022097617387771606, -0.02880568616092205, 0.014175659976899624, -0.03401500731706619, -0.010281442664563656, -0.05501694604754448, -0.041856300085783005, 0.07016798853874207, -0.022585496306419373, -0.007230871357023716, 0.02143889106810093, 0.011802875436842442, -0.011940510012209415, 0.001225354615598917, -0.04420488327741623, 0.058923713862895966, 0.07726655155420303, -0.024950502440333366, -0.005545462481677532, 0.037338823080062866, -0.03718772903084755, 0.08340831100940704, 0.030300375074148178, -0.04332158342003822, -0.10117480903863907, -0.023774733766913414, 0.055412717163562775, 0.07188894599676132, 0.048699796199798584, 0.02051064558327198, -0.05177381634712219, 0.046848755329847336, 0.06421937793493271, 0.014812597073614597, 0.06599052250385284, 0.055128950625658035, 0.057206105440855026, 0.004570540506392717, 0.0006673894240520895, -0.04956628009676933, 0.018173960968852043, 0.009045585989952087, -0.09929032623767853, -0.0734606683254242, 0.009978558868169785, 0.016378602012991905, -0.0809779167175293, 0.028371425345540047, 0.07337132841348648, -0.0712965577840805, -0.07612331956624985, 0.023224541917443275, -0.01886812597513199, 0.049867402762174606, 0.04525093734264374, -0.04347287490963936, 0.04647829011082649, -0.020921878516674042, 0.055911704897880554, 0.0646883100271225, 0.043256886303424835, 0.012135359458625317, 0.06405725330114365, 0.04327752813696861, -0.06879010051488876, -0.02182726003229618, -0.030435195192694664, -0.04794333875179291, 0.03966866061091423, -0.05612926930189133, 0.061092350631952286, -0.047390542924404144, 0.06440525501966476, 0.07119303941726685, 0.036672186106443405, 0.039346762001514435, 0.05825766921043396, -0.05363740026950836, 0.026515239849686623, -0.021117106080055237, -0.061990927904844284, 0.06407181918621063, -0.02918284200131893, 0.06280291080474854, 0.05465791001915932, 0.025043612346053123, -0.015093226917088032, 0.0339696891605854, 0.039516378194093704, -0.005943501368165016, 0.037065502256155014, 0.0036617075093090534, -0.04032375290989876, -0.027956390753388405, -0.028206538408994675, 0.003602939657866955, 0.0015424611046910286, 0.03779160603880882, -0.012583530507981777], "LEA":[-0.07203060388565063, -0.017553633078932762, 0.0402604416012764, -0.03958871215581894, -0.035693515092134476, 0.006020952947437763, 0.06661038845777512, -0.05565638095140457, -0.07512512803077698, 0.015386131592094898, 0.1531272977590561, 0.07126382738351822, -0.018143991008400917, 0.0798688530921936, -0.0836813896894455, -0.005903773941099644, -0.03920849785208702, 0.025672506541013718, -0.017640162259340286, -0.09243063628673553, 0.0272371768951416, 0.04267166927456856, -0.032052017748355865, 0.06952647119760513, -0.03414658084511757, 0.05041181296110153, 0.04035321623086929, 0.04639449715614319, -0.000271787925157696, 0.1057962104678154, -0.031690120697021484, 0.0785541757941246, -0.008634688332676888, 0.035989925265312195, -0.00988205149769783, -0.047323428094387054, -0.018978994339704514, -0.001277003320865333, -0.022872451692819595, -0.034365635365247726, -0.04628191888332367, 0.06221615523099899, 0.01957613043487072, 0.13219280540943146, 0.03662179410457611, 0.046082716435194016, 0.011469600722193718, -0.025702660903334618, -0.08428508788347244, -0.07941769808530807, -0.06742636859416962, 0.0873873308300972, 0.0038614647928625345, 0.02177446149289608, -0.004519546404480934, -0.06213155761361122, -0.011228920891880989, -0.12034870684146881, 0.008946738205850124, 0.009164049290120602, -0.02258075587451458, 0.016061170026659966, 0.0645158663392067, 0.03723616153001785, -0.06451661139726639, -0.005219440441578627, -0.055180057883262634, 0.015841009095311165, -0.01621314138174057, -0.09887613356113434, 0.04894544556736946, -0.07996354252099991, 0.0138346366584301, -0.04036646708846092, -0.07073907554149628, -0.019294722005724907, -0.08181063830852509, -0.002301511587575078, -0.03429428115487099, 0.04098176211118698, 0.0706806555390358, 0.020024126395583153, 0.043529968708753586, 0.060017164796590805, 0.003525135340169072, -0.029752371832728386, -0.021769365295767784, 0.03941021487116814, -0.002250884659588337, -0.08078912645578384, 0.015297000296413898, 0.026888463646173477, 0.048139896243810654, -0.04837239161133766, -0.036249756813049316, -0.027615496888756752, -0.15165935456752777, -0.03756902739405632, 0.015112340450286865, -0.0010633820202201605], @@ -89,13 +83,10 @@ "LXADD":[-0.11344388872385025, 0.08068472892045975, -0.041796449571847916, -0.043138183653354645, -0.049067553132772446, -0.005337296053767204, 0.021436110138893127, -0.035862036049366, -0.05354782193899155, 0.007918866351246834, -0.033625587821006775, 0.048349399119615555, 0.07167208194732666, -0.04589017853140831, -0.023661522194743156, 0.03580676391720772, 0.03326055034995079, 0.041535746306180954, -0.008772681467235088, -0.03362834453582764, -0.008885134011507034, -0.005286931060254574, -0.09389151632785797, 0.015108847059309483, -0.020455803722143173, 0.06477829068899155, 0.012845957651734352, -0.03201524540781975, -0.07100234925746918, 0.046879976987838745, -0.06030888110399246, 0.022502053529024124, -0.10942362248897552, -0.06978410482406616, 0.0714743509888649, 0.057766277343034744, 0.038102924823760986, -0.007761931978166103, -0.11331900954246521, -0.07498679310083389, -0.002573479898273945, -0.005142265930771828, -0.04596858471632004, -0.05356051027774811, 0.10633396357297897, -0.07426618784666061, 0.037482988089323044, 0.10527358204126358, 0.08239476382732391, 0.0678592249751091, -0.014271541498601437, -0.010673552751541138, -0.0767236202955246, 0.0329856239259243, -0.02222914807498455, -0.0019944666419178247, -0.0789676085114479, 0.006855306681245565, -0.012843947857618332, -0.10197136551141739, -0.036981865763664246, 0.04500154033303261, 0.0023044694680720568, -0.0031417198479175568, -0.06536462903022766, -0.02773689292371273, 0.06672050058841705, 0.046953968703746796, 0.009028433822095394, -0.008872197940945625, 0.09054717421531677, 0.009121377021074295, 0.09400534629821777, 0.012045130133628845, -0.014854185283184052, 0.030989984050393105, -0.030203191563487053, 0.09275887161493301, -0.009853487834334373, 0.038435857743024826, 0.05689401552081108, -0.06919367611408234, -0.02360834926366806, -0.08338318765163422, 0.01904873177409172, -0.027271559461951256, -0.05529508367180824, 0.09507890790700912, -0.03128642588853836, 0.026687508448958397, -0.05117009952664375, -0.03872146084904671, 0.08641110360622406, -0.027542488649487495, -0.09849996864795685, 0.05740527808666229, -0.02291804924607277, -0.10829142481088638, 0.008436905220150948, 0.027438905090093613], "MAXSDrr":[-0.06119297072291374, -0.04124095290899277, -0.0296846404671669, -0.045824289321899414, 0.02508155070245266, 0.007925539277493954, 0.043926920741796494, -0.03159729018807411, 0.019068658351898193, -0.013711963780224323, -0.028986897319555283, -0.04561398923397064, 0.04851536825299263, -0.03764308616518974, -0.018207892775535583, 0.016173269599676132, -0.004123492166399956, -0.025343073531985283, -0.09777097404003143, 0.0290510356426239, -0.06969164311885834, -0.06684337556362152, 0.04377250373363495, 0.06861237436532974, -0.046966683119535446, 0.0611143596470356, -0.044503044337034225, 0.023559842258691788, -0.029876690357923508, 0.011016200296580791, 0.07286348938941956, 0.00030023325234651566, 0.08359035104513168, 0.017708808183670044, 0.07800529897212982, -0.08712167292833328, 0.002862636698409915, -0.06735634058713913, 0.03052128478884697, 0.04226242005825043, 0.023851098492741585, -0.04562359303236008, -0.013745550066232681, 0.013936172239482403, -0.0647776871919632, -0.0487772636115551, 0.07015536725521088, -0.030445875599980354, -0.043143901973962784, -0.09556057304143906, 0.047779254615306854, 0.046041958034038544, 0.009388554841279984, 0.04671555384993553, -0.059331271797418594, 0.03360891714692116, 0.03569460287690163, 0.004674405790865421, 0.03280949592590332, -0.011293579824268818, -0.05531742051243782, 0.045912306755781174, 0.04241438955068588, -0.07023770362138748, -0.03889290615916252, 0.019566599279642105, 0.06292827427387238, -0.012180106714367867, -0.009482266381382942, 0.0033363515976816416, -0.028241898864507675, 0.04916750639677048, -0.011430651880800724, 0.05025538429617882, 0.02134493552148342, 0.04370661824941635, 0.08801361173391342, -0.04115797579288483, -0.06421534717082977, -0.051845721900463104, -0.041304778307676315, 0.0507316067814827, 0.049301628023386, -0.013558737933635712, -0.004291698802262545, 0.038709867745637894, -0.0636303573846817, -0.047141704708337784, 0.022303685545921326, 0.07054309546947479, 0.009679436683654785, 0.0638614296913147, -0.046838339418172836, 0.01595005951821804, -0.025526082143187523, -0.0818924531340599, 0.016986405476927757, 0.023154381662607193, 0.06338698416948318, 0.07277237623929977], "MAXSSrr":[0.04370328411459923, 0.007435579318553209, 0.05632773041725159, 0.05872607231140137, -0.02179848775267601, -0.02491024136543274, -0.09028499573469162, -0.073136106133461, 0.0038046056870371103, -0.004702121019363403, 0.06376311928033829, 0.025374436751008034, 0.03343794494867325, -0.03841162100434303, 0.04050759971141815, 0.06359805166721344, -0.05459776520729065, -0.013898322358727455, 0.043059010058641434, 0.008913826197385788, -0.08469206839799881, 0.07041019201278687, -0.08591683208942413, 0.001833248999901116, 0.07940677553415298, -0.025694575160741806, -0.07197162508964539, -0.017312491312623024, -0.037606846541166306, -0.024861449375748634, 0.024707462638616562, -0.00026734761195257306, 0.033847302198410034, 0.05927937477827072, 0.04899705946445465, 0.0770091861486435, -0.09790053963661194, 0.057826053351163864, 0.05768071860074997, 0.01531772967427969, 0.0404951311647892, -0.04033346846699715, 0.05936214700341225, -0.029121382161974907, 0.044257547706365585, -0.10413498431444168, 0.09214437007904053, 0.017709942534565926, 0.026122651994228363, -0.08045665174722672, -0.03744427487254143, 0.09111800789833069, 0.0020880592055618763, 0.07745599746704102, 0.04109589755535126, -0.07718705385923386, -0.045550283044576645, 0.06791391223669052, 0.06261736899614334, -0.04795467481017113, 0.016496436670422554, 0.02853775955736637, -0.038986679166555405, 0.012603304348886013, 0.05299075320363045, 0.0022748250048607588, 0.00884503684937954, 0.1081618219614029, 0.05347983166575432, 0.03069908171892166, 0.015294212847948074, 0.0618034303188324, -0.07555301487445831, -0.0897526815533638, -0.07293840497732162, -0.02863491326570511, 0.01548877265304327, 0.09115951508283615, 0.011775748804211617, -0.009436656720936298, -0.07188120484352112, -0.004493236541748047, 0.0661926344037056, -0.04905804619193077, -0.06685564666986465, 0.06110713630914688, 0.018521195277571678, -0.04577818885445595, 0.07256703823804855, 0.0831693485379219, 0.008730655536055565, 0.04827301949262619, 0.0754026547074318, 0.027548737823963165, -0.07210569083690643, -0.004550515208393335, -0.06998797506093979, -0.014580612070858479, -0.04511459916830063, 0.1119980439543724], - "MBB":[0.0285621527582407, 0.017540860921144485, -0.08473232388496399, -0.004012782592326403, 0.01284435298293829, -0.05268647149205208, 0.05576688051223755, 0.0021535248961299658, -0.03945871442556381, -0.006189210340380669, -0.015129411593079567, -0.08998296409845352, -0.023543253540992737, -0.03973307088017464, 0.03474939242005348, -0.01602775789797306, -0.07461361587047577, -0.016514597460627556, -0.016366377472877502, 0.004728052299469709, -0.023341577500104904, -0.0914730429649353, 0.030636735260486603, -0.03425632417201996, 0.03614623472094536, -0.007019295822829008, -0.0218521635979414, -0.015808485448360443, -0.05414801836013794, 0.029721688479185104, 0.09407073259353638, 0.029655681923031807, -0.005722714588046074, 0.08653672784566879, 0.01633341796696186, -0.07890991121530533, -0.07574641704559326, 0.013483843766152859, -0.0011275253491476178, -0.05623066797852516, -0.03096684440970421, -0.0019136210903525352, 0.005127475131303072, 0.005057196598500013, -0.008401975966989994, -0.0391613207757473, -0.0026145142037421465, 0.05342942103743553, 0.034099776297807693, 0.028928104788064957, -0.006105952430516481, -0.039190810173749924, 0.026784662157297134, -0.07679374516010284, -0.007475676946341991, -0.036650288850069046, 0.00774755235761404, 0.008984091691672802, -0.059830714017152786, 0.042310964316129684, 0.0681624785065651, -0.018189340829849243, -0.014816401526331902, -0.05541539564728737, -0.09348370134830475, 0.003691869555041194, -0.0010735570685938, -0.010131723247468472, -0.041050590574741364, -0.013792471028864384, -0.024337435141205788, 0.07526508718729019, 0.08163300901651382, -0.03508464992046356, -0.01681988686323166, -0.06734774261713028, -0.07656992971897125, -0.03866373747587204, 0.004544078838080168, 0.0585801787674427, -0.021823249757289886, -0.0610244981944561, -0.04469957575201988, -0.011089849285781384, -0.05069964751601219, -0.025694409385323524, -0.0670132040977478, 0.09616350382566452, 0.06308142840862274, -0.10543308407068253, 0.0023751568514853716, -0.06237253174185753, 0.05771911144256592, -0.06010056659579277, -0.016188565641641617, 0.009142348542809486, -0.014255198650062084, -0.02999819628894329, 0.00473234336823225, 0.03976761922240257], - "MCSymbol":[0.05158298835158348, 0.05024643987417221, 0.06704410910606384, 0.0378347709774971, -0.03902719169855118, -0.08626251667737961, 0.03964311257004738, 0.06615762412548065, 0.04361319541931152, 0.03646374121308327, -0.018487416207790375, 0.0024993624538183212, 0.006693041883409023, 0.08311881870031357, 0.021111667156219482, 0.038208797574043274, 0.08689694851636887, -0.03659898787736893, 0.020775076001882553, 0.03553535416722298, 0.06854367256164551, -0.002012243028730154, 0.03658154606819153, 0.03127564862370491, 0.0363621786236763, -0.027205800637602806, -0.05243372917175293, 0.012564878910779953, -0.013430594466626644, -0.04043225944042206, -0.025083716958761215, 0.09665156900882721, 0.005077417939901352, -0.05181048810482025, 0.08925056457519531, 0.0777667909860611, -0.013708796352148056, 0.07754126191139221, 0.08393577486276627, 0.06395212560892105, -0.07428556680679321, -0.052424050867557526, 0.03497577831149101, 0.01964585855603218, -0.0429445318877697, 0.07072066515684128, 0.0017074055504053831, 0.059513408690690994, 0.013262910768389702, -0.07240563631057739, 0.09288764744997025, 0.030620144680142403, -0.046197980642318726, 0.04847298562526703, -0.03942957893013954, -0.0025783153250813484, -0.019526517018675804, 0.038867682218551636, 0.006007499527186155, -0.06366054713726044, 0.004640159662812948, 0.013837787322700024, -0.020015377551317215, -0.010317903012037277, 0.001741019543260336, 0.06261103600263596, -0.03374830260872841, 0.01629183441400528, -0.013137640431523323, 0.026046304032206535, -0.009679407812654972, -0.07085473090410233, 0.03035539574921131, -0.08764562010765076, -0.03820766881108284, -0.04181021824479103, -0.05163294076919556, 0.06666433811187744, -0.08939782530069351, 0.040260378271341324, -0.06847432255744934, 0.09106951206922531, -0.07388591021299362, -0.07479099184274673, -0.001779694459401071, -0.0963745042681694, -0.06515862792730331, -0.08404017239809036, -0.09935544431209564, 0.010541093535721302, -0.04491754248738289, 0.09378639608621597, 0.006655062548816204, 0.06637217849493027, -0.05623293295502663, -0.020134123042225838, 0.005873391404747963, -0.07765494287014008, -0.0008442706312052906, -0.03568055108189583], "MINSDrr":[0.00284420233219862, 0.07673676311969757, 0.08602232486009598, 0.030074521899223328, -0.06255929172039032, -0.10135219246149063, 0.0772649347782135, 0.0045582992024719715, -0.01195931900292635, 0.009085145778954029, -0.04665979743003845, 0.019213048741221428, 0.022454556077718735, -0.05505772680044174, 0.035268958657979965, -0.06431140005588531, -0.001450810581445694, -0.027346337214112282, 0.041191086173057556, -0.0808955729007721, -0.04748200997710228, 0.0653977245092392, 0.042980875819921494, -0.04332194849848747, -0.024661004543304443, 0.09317019581794739, -0.06639514118432999, 0.013383567333221436, 0.051771167665719986, 0.05815904587507248, -0.05226780101656914, 0.079694002866745, -0.017969269305467606, -0.07137028127908707, -0.0011493286583572626, -0.02009846828877926, 0.006549016106873751, 0.0019126685801893473, 0.06168307736515999, -0.025323089212179184, 0.010943768545985222, 0.02157585136592388, -0.012993190437555313, -0.025179127231240273, -0.08958654850721359, -0.04273540899157524, 0.015248515643179417, 0.05456075817346573, 0.05705633386969566, -0.0038763433694839478, 0.08008016645908356, -0.004114328417927027, -0.01975642889738083, -0.014040309935808182, 0.025527596473693848, -0.06883629411458969, 0.06273050606250763, 0.05779215693473816, -0.061573851853609085, 0.01889919489622116, 0.026195447891950607, -0.021544434130191803, -0.0810774490237236, -0.016286203637719154, 0.01799311302602291, -0.08440321683883667, 0.0897485539317131, 0.08083964139223099, -0.006629236973822117, 0.051063962280750275, -0.08597207814455032, 0.029692046344280243, -0.03309508413076401, -0.09422174096107483, 0.0019163102842867374, 0.05546015128493309, -0.05980079993605614, -0.07416199892759323, -0.005134278908371925, 0.07392455637454987, -0.0634748563170433, 0.020546387881040573, -0.019978882744908333, 0.039572179317474365, -0.04754075035452843, -0.06090293824672699, -0.011185224168002605, -0.054661743342876434, 0.027916360646486282, -0.00819246843457222, -0.03119322657585144, 0.019949961453676224, 0.008312772959470749, 0.06788603216409683, 0.041624777019023895, 0.051687415689229965, -0.04819793254137039, -0.0761520192027092, -0.019374510273337364, -0.008435340598225594], "MINSSrr":[-0.06906168162822723, 0.008121289312839508, 0.010413543321192265, 0.052863992750644684, 0.01030051801353693, -0.009280139580368996, 0.016139337792992592, -0.05126945674419403, 0.06733083724975586, -0.01006366591900587, 0.06506948918104172, 0.05012301355600357, -0.07191506028175354, 0.018038516864180565, -0.020798280835151672, 0.08538958430290222, -0.028427604585886, 0.02630189247429371, 0.010489841923117638, 0.10011959075927734, -0.067482590675354, 0.01461686473339796, 0.03908747434616089, -0.015383233316242695, -0.03783239424228668, 0.06359098851680756, -0.052475571632385254, 0.07818790525197983, -0.0030931381043046713, 0.013684416189789772, 0.04222726821899414, 0.04708671569824219, 0.01192860770970583, 0.08628913760185242, -0.06380248814821243, -0.004006511997431517, -0.02817981317639351, -0.11196613311767578, 0.01953534409403801, 0.0034300305414944887, -0.040240559726953506, 0.004963779356330633, -0.06623393297195435, -0.04386508837342262, -0.08431598544120789, -0.023293999955058098, 0.02133636176586151, 0.04054516181349754, 0.04479363188147545, 0.02776535600423813, 0.01497643906623125, 0.026148531585931778, -0.05869835242629051, -0.07451415807008743, -0.009552933275699615, -0.004124804865568876, 0.08342882245779037, 0.05295371264219284, -0.05495591461658478, -0.07350015640258789, -0.05573306977748871, 0.07158630341291428, 0.04162517935037613, 0.0019162269309163094, -0.07742705941200256, -0.05673951655626297, 0.05760834366083145, 0.08143799751996994, 0.09629082679748535, -0.05737840384244919, 0.03762679174542427, 0.022383252158761024, 0.02897579036653042, -0.0929567888379097, 0.04767351970076561, 0.05145186930894852, 0.012956425547599792, 0.04237693175673485, 0.06772835552692413, 0.011290902271866798, -0.06324069201946259, -0.04689439386129379, 0.09521757066249847, 0.05625065788626671, -0.032533977180719376, -0.00987032800912857, -0.08346299827098846, -0.06292857229709625, 0.042861636728048325, 0.08865208923816681, -0.0021774298511445522, 0.010668188333511353, -0.05791740491986275, 0.02240762859582901, -0.022414017468690872, 0.04343479871749878, 0.01852354407310486, -0.004329795949161053, -0.00262851663865149, -0.009029376320540905], "MOV":[-0.03924819082021713, -0.015029003843665123, 0.14121688902378082, -0.05414531007409096, -0.01409768033772707, 0.05467522144317627, -0.0798286497592926, 0.042834796011447906, -0.04328306391835213, -0.12638653814792633, 0.02380293421447277, -0.010002975352108479, -0.03018246777355671, -0.09843093156814575, -0.015159506350755692, -0.03186051547527313, -0.009830419905483723, 0.024049948900938034, -0.028536750003695488, -0.05252794921398163, -0.003984724637120962, -0.09075328707695007, -0.015937313437461853, 0.07316069304943085, 0.002778300317004323, 0.003214895725250244, -0.0832214206457138, 0.012602301314473152, 0.0687694102525711, 0.1425037384033203, -0.04724106192588806, 0.05618143081665039, 0.0028424363117665052, 0.03067261539399624, 0.008477674797177315, -0.002142940880730748, 0.0036045191809535027, -0.02257452718913555, 0.013552851043641567, -0.016065331175923347, 0.03364546224474907, 0.0027604023925960064, -0.013575572520494461, 0.1340155154466629, 0.04859570413827896, 0.07984673976898193, 0.006813493091613054, -0.017625009641051292, -0.0667564794421196, -0.0025298972614109516, -0.06280945241451263, 0.08589767664670944, -0.011751428246498108, 0.04074618220329285, 0.0561428964138031, -0.0068444423377513885, 0.028041694313287735, 0.06258948892354965, 0.02493610419332981, -0.018480388447642326, -0.035079196095466614, 0.14365622401237488, -0.046609606593847275, 0.040164150297641754, -0.049927353858947754, 0.06781942397356033, -0.04828719049692154, 0.03496144339442253, -0.044686879962682724, 0.04254060238599777, 0.024320241063833237, -0.0031205937266349792, -0.049061503261327744, -0.028716804459691048, -0.056192029267549515, 0.022012677043676376, -0.0745186060667038, -0.0008951064082793891, -0.051033493131399155, 0.023357892408967018, 0.06984421610832214, 0.0057564410381019115, -0.005192344542592764, -0.003961252048611641, -0.012275456450879574, -0.018581852316856384, -0.0046620736829936504, 0.02494811825454235, 0.0520334355533123, -0.02435225434601307, 0.0008846594137139618, 0.017687007784843445, 0.07866063714027405, -0.025595100596547127, -0.020679078996181488, -0.027750879526138306, 0.10005537420511246, -0.015581297688186169, -0.08011393249034882, 0.028118811547756195], "MUL":[-0.026987887918949127, 0.06016572564840317, 0.0787728950381279, -0.0803905576467514, 0.005736608523875475, -0.07245960086584091, -0.02662983350455761, 0.012340782210230827, 0.042490337044000626, 0.06399581581354141, -0.009004191495478153, 0.0370473749935627, -0.0605553574860096, -0.09520823508501053, 0.0010566662531346083, -0.028270091861486435, 0.08631408214569092, 0.002891023177653551, -0.051674507558345795, -0.04089691862463951, -0.04444378614425659, -0.061945777386426926, -0.026001833379268646, 0.04689744487404823, -0.07711070775985718, 0.07018855959177017, -0.02606336772441864, 0.054914504289627075, 0.03522270917892456, -0.027317974716424942, 0.02187947928905487, 0.009710998274385929, 0.01340037677437067, 0.016422593966126442, -0.058249425143003464, -0.08377814292907715, -0.04476138949394226, 0.04349169507622719, 0.05062006786465645, 0.01706511154770851, 0.020649245008826256, 0.06287672370672226, -0.03981941193342209, 0.04973218962550163, -0.03353424742817879, -0.016799092292785645, -0.031751759350299835, 0.10430201143026352, -0.04326871410012245, 0.0736854076385498, -0.0768580436706543, -0.03183818608522415, 0.010583195835351944, 0.015541432425379753, 0.03191666305065155, 0.020011236891150475, 0.041239380836486816, -0.0029152908828109503, 0.009499716572463512, -0.011166329495608807, 0.03469998389482498, 0.00607832008972764, 0.030300112441182137, -0.040855471044778824, 0.00988304428756237, 0.050531189888715744, 0.06647889316082001, -0.027519647032022476, -0.06819992512464523, 0.02215251699090004, 0.086424820125103, -0.03395787626504898, -0.020825445652008057, 0.08309803158044815, -0.0256529338657856, 0.005000723991543055, -0.03375622257590294, 0.005569287110120058, -0.028089171275496483, 0.04142652079463005, -0.03232670575380325, 0.025872791185975075, -0.07439207285642624, 0.04975134879350662, 0.049770113080739975, -0.05090470612049103, -0.04476647078990936, 0.09217675030231476, 0.05079415813088417, 0.017867455258965492, -0.04477125406265259, 0.004301204811781645, 0.05066722631454468, -0.08186711370944977, 0.008772231638431549, -0.10532139241695404, 0.004499110858887434, 0.03296274691820145, -0.0020684772171080112, 0.05012065917253494], - "Metadata":[-0.07879140228033066, 0.024690961465239525, 0.022790303453803062, 0.01354144886136055, -0.07098772376775742, 0.04053819552063942, -0.04038544371724129, -0.021055836230516434, 0.10361373424530029, 0.04415135458111763, -0.09545262902975082, 0.042553599923849106, -0.021835647523403168, 0.07703430950641632, -0.04880501329898834, -0.04054124280810356, 0.05049756169319153, 0.08986796438694, 0.0705084353685379, -0.0077315340749919415, -0.045390889048576355, 0.053155045956373215, 0.045656319707632065, -0.02663712576031685, -0.01446426473557949, -0.058978915214538574, 0.011314704082906246, 0.03043927252292633, -0.0843580812215805, 0.017854437232017517, -0.08720997720956802, 0.030351335182785988, -0.04896129295229912, 0.04189978539943695, -0.09887325763702393, 0.0015409664483740926, -0.08604399859905243, 0.10654544085264206, 0.1058540865778923, 0.014106648042798042, 0.0640459656715393, -0.05182884633541107, 0.006081609521061182, 0.07624028623104095, 0.02025698497891426, 0.08467324078083038, 0.027136018499732018, 0.026320911943912506, -0.035337720066308975, 0.03864980861544609, -0.019960917532444, -0.029152821749448776, 0.06562864780426025, 0.028298277407884598, -0.07397148013114929, -0.005078969523310661, 0.025909438729286194, -0.01157586183398962, 0.05436081811785698, 0.03408071771264076, -0.07142144441604614, -0.0523630827665329, -0.06302442401647568, -0.019975490868091583, -0.06937523931264877, 0.057667043060064316, -0.08580337464809418, -0.05092239752411842, -0.012613813392817974, 0.025480754673480988, 0.04219530522823334, -0.007300581783056259, 0.05323299020528793, 0.0489904023706913, 0.09260626882314682, -0.04819458723068237, 0.05419271066784859, 0.04558999091386795, 0.012036344967782497, -0.05483977124094963, -0.05181310698390007, -0.02104383148252964, -0.057876624166965485, 0.039601441472768784, 0.025240536779165268, -0.03984035924077034, 0.07654847204685211, -0.07073183357715607, -0.0018080074805766344, -0.016453349962830544, 0.03962434455752373, 0.05717255175113678, 0.01962372660636902, 0.00952839944511652, 0.0013127806596457958, 0.013634574599564075, 0.07692103832960129, 0.06334574520587921, 0.056647684425115585, -0.02965259924530983], "NEG":[-0.0585959330201149, -0.02519698068499565, 0.029133861884474754, -0.003332944354042411, 0.05054186284542084, -0.03572014719247818, -0.012210451066493988, 0.06708117574453354, -0.0712793841958046, -0.01644597202539444, 0.06453811377286911, -0.03662518784403801, 0.0545802004635334, -0.11130833625793457, -0.04544609412550926, 0.012950814329087734, 0.08011337369680405, 0.014672964811325073, 0.0030391360633075237, -0.10994786024093628, 0.004102041013538837, -0.0749390497803688, -0.010000540874898434, 0.062072113156318665, 0.03312767669558525, -0.04764379560947418, -0.033307697623968124, 0.02903047949075699, 0.0319744311273098, 0.027374137192964554, -0.05640692263841629, -0.01572772115468979, 0.019634589552879333, 0.0629790723323822, -0.024743184447288513, -0.09348101913928986, 0.04078087955713272, 0.0002063393039861694, 0.01791796088218689, -0.01174850668758154, 0.0067609078250825405, 0.031922854483127594, 0.045338794589042664, -0.06706424057483673, -0.03090975433588028, 0.035511564463377, 0.0377444289624691, -0.007464382331818342, 0.02387971244752407, -0.023001981899142265, -0.0052301278337836266, 0.08532170951366425, 0.00384823651984334, 0.0689602717757225, -0.05606595426797867, 0.03483026847243309, 0.023350417613983154, -0.06512849777936935, 0.0627395287156105, -0.0203714482486248, -0.009735504165291786, 0.06432165950536728, -0.04546240717172623, 0.0322086475789547, 0.004561635199934244, 0.040702879428863525, -0.0680280476808548, 0.025354159995913506, -0.07624178379774094, 0.06776861846446991, 0.07863514125347137, -0.037652503699064255, -0.023264721035957336, 0.030604641884565353, -0.07419195026159286, 0.014679630286991596, 0.1294829547405243, 0.007591600529849529, -0.06612348556518555, 0.03127516806125641, 0.10645392537117004, -0.018773522228002548, 0.03992835432291031, 0.044048961251974106, 0.00023814172891434282, -0.06797933578491211, -0.08000202476978302, -0.04320430010557175, 0.043590281158685684, -0.05034546181559563, 0.014501169323921204, 0.03329288214445114, 0.03045976720750332, -0.01932660862803459, -0.026188183575868607, -0.1232738122344017, -0.04858024790883064, -0.015570580027997494, 0.013346930965781212, 0.009410912171006203], "NOT":[0.02556992694735527, -0.0005189123330637813, 0.010195978917181492, -0.027382172644138336, -0.0374554842710495, 0.08793098479509354, 0.0024311996530741453, -0.08769379556179047, -0.054654307663440704, -0.08747632801532745, 0.09218847006559372, 0.0972878560423851, 0.044738128781318665, -0.02398994378745556, -0.046165600419044495, -0.0002692296984605491, -0.03797682002186775, 0.05161413550376892, -0.033769138157367706, 0.011279402300715446, 0.08941229432821274, -0.07437314093112946, -0.025249861180782318, 0.1026485413312912, -0.042062994092702866, 0.022835882380604744, 0.05108749121427536, -0.054616689682006836, -0.04208545386791229, 0.10205414891242981, -0.02474227361381054, -0.01605238951742649, -0.011079655028879642, -0.04231556877493858, -0.058844879269599915, 0.0017704797210171819, 0.005396600812673569, -0.058835554867982864, 0.03384264558553696, -0.024245088919997215, 0.03355555981397629, 0.02017929218709469, 0.04421762749552727, 0.09027500450611115, 0.03916880115866661, 0.042518291622400284, 0.024490609765052795, 0.00026937652728520334, -0.010342003777623177, -0.05488119646906853, 0.07418034970760345, 0.0008032438345253468, 0.09190968424081802, 0.07747997343540192, -0.024773627519607544, 0.0496656633913517, -0.038326963782310486, -0.0022213482297956944, 0.02448110282421112, 0.0022990668658167124, 0.052763812243938446, 0.051123637706041336, 0.03795074671506882, 0.06734737008810043, -0.030445149168372154, 0.021410485729575157, -0.044919464737176895, -0.0011586989276111126, -0.0903671532869339, -0.01408425159752369, 0.07342954725027084, -0.04118982329964638, -0.008432484231889248, -0.0008165669860318303, -0.0642886608839035, 0.007230957038700581, -0.0670868456363678, -0.01116579957306385, -0.09545603394508362, -0.03109285980463028, 0.005951744969934225, 0.024672016501426697, -0.04027184471487999, 0.03607063740491867, 0.023179687559604645, 0.0117312828078866, -0.019768331199884415, -0.023262612521648407, 0.04165903106331825, -0.039224691689014435, 0.040571704506874084, 0.08653629571199417, 0.027772698551416397, -0.08196783810853958, -0.013821743428707123, 0.004212009254842997, 0.01664070598781109, -0.008459849283099174, 0.041462354362010956, 0.06886350363492966], "OR":[-0.0010318798013031483, -0.058885037899017334, 0.015562368556857109, -0.03459857404232025, -0.006239954382181168, 0.04347813501954079, -0.043183062225580215, -0.06115246191620827, -0.08097145706415176, -0.040188197046518326, 0.02098822593688965, -0.013338722288608551, -0.01845080405473709, -0.07172099500894547, -0.00026761949993669987, 0.015059647150337696, -0.08275016397237778, 0.10280061513185501, -0.017712965607643127, -0.07511771470308304, 0.007648291997611523, -0.12827979028224945, -0.020353827625513077, 0.08809063583612442, -0.02829514630138874, 0.003038457129150629, -0.04399721696972847, 0.046383049339056015, 0.06416497379541397, -0.0006932668038643897, -0.033501505851745605, -0.012374987825751305, 0.018504725769162178, 0.00529597420245409, -0.040804456919431686, -0.00419827364385128, -0.017476536333560944, -0.04530858248472214, 0.01608600653707981, -0.08898036181926727, -0.015132613480091095, -0.053797122091054916, -0.011825251393020153, 0.09507828205823898, 0.08454664051532745, 0.04075947031378746, 0.020354142412543297, 0.01704799383878708, -0.026439497247338295, -0.04004717990756035, -0.053405825048685074, 0.04079057276248932, 0.026150185614824295, 0.04538597911596298, 0.046778932213783264, 0.057205770164728165, 0.037173718214035034, -0.07114585489034653, 0.03480122983455658, 0.0069038826040923595, -0.056386105716228485, -0.03294815868139267, 0.04636325314640999, -0.05767818167805672, -0.05788124352693558, -0.011048000305891037, -0.04350278526544571, 0.029680529609322548, -0.0512658953666687, 0.04321866109967232, 0.047014784067869186, -0.014913392253220081, -0.007425297982990742, -0.09810416400432587, -0.07316632568836212, 0.05063875392079353, -0.07298189401626587, -0.012434680946171284, -0.09386061877012253, 0.016765601933002472, 0.06658460199832916, 0.0014198448043316603, -0.022241152822971344, 0.05902376398444176, 0.057584285736083984, 0.024565961211919785, -0.02896188013255596, 0.006485136691480875, 0.05981580168008804, -0.015995489433407784, 0.027470067143440247, 0.09679803997278214, 0.0342426523566246, -0.08387557417154312, -0.015599220991134644, -0.0049544889479875565, -0.06524655222892761, 0.02150602824985981, 0.016511479392647743, 0.055177561938762665], @@ -115,7 +106,6 @@ "PCMPGTBrr":[-0.04665364325046539, -0.03588206693530083, 0.05219453573226929, 0.08376432955265045, 0.05562759190797806, -0.0034289404284209013, 0.08200010657310486, 0.023898538202047348, -0.002851601457223296, -0.08778133243322372, 0.017107484862208366, 0.08448091894388199, 0.020043527707457542, 0.038858626037836075, 0.036468397825956345, -0.0069902255199849606, -0.09442859143018723, 0.0018075992120429873, 0.05577728524804115, -0.0005804274696856737, 0.029588190838694572, -0.050955869257450104, 0.016604335978627205, -0.054141607135534286, -0.030936168506741524, 0.004688458051532507, -0.02321118488907814, -0.009524177759885788, 0.030161075294017792, -0.0557246096432209, 0.017830688506364822, 0.04058525711297989, 0.023080267012119293, 0.04536818340420723, 0.09658516198396683, 0.004083207808434963, 0.053284309804439545, 0.07114734500646591, 0.03272407501935959, -0.06646303087472916, 0.08200454711914062, -0.06558514386415482, 0.0745493471622467, -0.0010506648104637861, -0.02250707894563675, 0.015057512558996677, -5.047186277806759e-06, 0.04663649946451187, 0.06489380449056625, -0.0477377213537693, -0.08882559835910797, 0.08948437124490738, -0.052260447293519974, 0.06798093020915985, -0.06404604762792587, 0.0005905702710151672, 0.014312930405139923, 0.0370929092168808, 0.03622571751475334, 0.06601805984973907, 0.04077596217393875, -0.0019877473823726177, -0.02357509359717369, 0.04524341970682144, 0.024309739470481873, -0.05969798564910889, -0.015872884541749954, -0.055400021374225616, 0.04820183292031288, 0.024034500122070312, -0.05125486105680466, 0.020366262644529343, 0.03310052305459976, 0.1036759540438652, 0.049202825874090195, -0.010945710353553295, -0.030628688633441925, 0.048871662467718124, 0.07457619905471802, 0.017111260443925858, 0.028184816241264343, -0.09065181016921997, -0.017116032540798187, -0.06233282387256622, -0.011385255493223667, -0.06190027296543121, -0.01189250499010086, -0.03632708638906479, 0.04705822467803955, 0.0022293981164693832, 0.06782552599906921, -0.0490303635597229, -0.08690774440765381, -0.08311695605516434, 0.04079030826687813, 0.022971853613853455, -0.019726071506738663, -0.032829709351062775, -0.05147984251379967, -0.06768873333930969], "PCMPGTDrr":[0.026095403358340263, 0.009877854026854038, -0.022390423342585564, -0.06749505549669266, 0.03866114094853401, 0.07523459941148758, -0.02331429533660412, -0.013958744704723358, -0.05151516944169998, -0.033018071204423904, -0.017118683084845543, 0.06611985713243484, 0.024562569335103035, 0.027193237096071243, -0.04081164300441742, -0.0557839497923851, 0.07676059752702713, -0.017435213550925255, -0.0696197971701622, 0.04529204219579697, 0.015718640759587288, -0.0868423655629158, -0.025476763024926186, 0.1075882539153099, 0.08407340198755264, 0.03219793736934662, -0.029079284518957138, -0.10067792236804962, -0.01665782555937767, -0.002518820110708475, 0.06302576512098312, -0.042360853403806686, -0.014688530936837196, -0.04797102138400078, -0.05708448588848114, 0.05345156416296959, -0.03360274061560631, -0.006362707354128361, 0.045909661799669266, -0.0034944594372063875, -0.04771789163351059, -0.015326191671192646, -0.017800530418753624, 0.009678518399596214, -0.01412744726985693, 0.09620117396116257, 0.0705861821770668, -0.0663042888045311, 0.07589521259069443, -0.08846025168895721, 0.008178732357919216, -0.023293234407901764, 0.049390021711587906, 0.00771696399897337, -0.026583032682538033, 0.012981866486370564, -0.06098538264632225, -0.04784953594207764, -0.001411060569807887, -0.0646580159664154, 0.07771933078765869, 0.012061100453138351, 0.026251494884490967, 0.024035189300775528, 0.00368816708214581, 0.019370727241039276, 0.0473535880446434, 0.0688827782869339, -0.0656280517578125, 0.0001225982268806547, -0.04765431582927704, 0.08570858836174011, 0.06544618308544159, 0.02309294231235981, -0.07891835272312164, 0.05969972908496857, 0.04259306937456131, -0.0388357900083065, 0.10700955986976624, -0.03643207252025604, -0.014097973704338074, 0.018475063145160675, -0.008959461003541946, -0.04132810980081558, -0.01586003415286541, -0.013873838819563389, 0.07354859262704849, 0.003967848140746355, -0.023853322491049767, -0.013099947944283485, 0.06407736241817474, 0.03060499019920826, -0.08859552443027496, 0.009045977145433426, -0.09939071536064148, -0.022137949243187904, -0.03951180726289749, -0.0316530205309391, 0.05501912534236908, -0.06330689787864685], "PEXTRWrr":[-0.05698293820023537, -0.02332535944879055, -0.01313185878098011, 0.08844685554504395, -0.030702419579029083, -0.042257267981767654, -0.06976033002138138, 0.08907881379127502, 0.040486857295036316, 0.01966431364417076, 0.011261478997766972, 0.011022844351828098, -0.0069642444141209126, -0.016230706125497818, -0.009695738554000854, -0.04666578397154808, 0.016855308786034584, -0.03308985382318497, 0.01504850760102272, 0.09940154105424881, -0.07109691947698593, 0.043378811329603195, -0.06964893639087677, -0.05999808758497238, 0.008651218377053738, 0.04237857088446617, 0.04557272046804428, 0.04033806174993515, -0.005760873202234507, 0.008976156823337078, 0.05276636406779289, -0.06584233790636063, -0.011512805707752705, -0.01598522625863552, -0.044132646173238754, -0.020889364182949066, -0.09435509145259857, -0.02823605202138424, 0.0820322185754776, -0.0391690619289875, 0.03367430716753006, -0.029474111273884773, -0.07719384133815765, 0.003098628716543317, 0.05822441354393959, -0.09175454080104828, 0.02256210707128048, -0.004901964217424393, -0.008566503413021564, 0.040359016507864, -0.04049991816282272, 0.010366388596594334, -0.05293237417936325, -0.0956558957695961, -0.01418458204716444, 0.05464276298880577, -0.014091472141444683, 0.023551519960165024, -0.042662639170885086, -0.07025191932916641, -0.0017952515045180917, 0.07680258899927139, -0.10743812471628189, -0.08435508608818054, -0.00337960640899837, -0.03381747379899025, 0.027066459879279137, -0.009784750640392303, 0.04265652969479561, 0.02066781371831894, -0.03692338988184929, 0.0029027678538113832, 0.06893923878669739, 0.03784753382205963, -0.04037536308169365, -0.09532847255468369, 0.03193795308470726, 0.0387917198240757, 0.03887058049440384, -0.0002478501701261848, -0.0671166405081749, 0.06754262745380402, 0.01643708348274231, 0.012460017576813698, 0.03147564455866814, 0.05646798014640808, 0.014081758446991444, 0.07141963392496109, 0.016428180038928986, 0.0443485863506794, 0.06492826342582703, 0.09964785724878311, 0.026795320212841034, 0.0271765124052763, -0.015695465728640556, -0.08133535832166672, -0.05439477041363716, 0.04913243651390076, 0.024485180154442787, -0.04072758927941322], - "PHY_REG":[-0.008169060572981834, -0.017023155465722084, -0.04927198588848114, 0.0014261528849601746, 0.012259463779628277, -0.02794509381055832, -0.024857040494680405, 0.029203711077570915, 0.0433109886944294, 0.009679347276687622, -0.05811547115445137, -0.09075025469064713, -0.08525611460208893, -0.10545054078102112, 0.06474080681800842, 0.056396666914224625, 0.06781823933124542, 0.09059076011180878, -0.10420752316713333, -0.08284831047058105, 0.02349182404577732, -0.0354253351688385, -0.004627702757716179, 0.0068538435734808445, -0.053724177181720734, -0.02113335393369198, 0.05254676192998886, -0.050769440829753876, 0.061386119574308395, -0.07541731745004654, -0.024204161018133163, -0.0009893826209008694, -0.007493770215660334, -0.017051052302122116, 0.015025814063847065, -0.020427946001291275, -0.0844966471195221, 0.04589429497718811, 0.025571472942829132, -0.05280151963233948, 0.06895384937524796, 0.03960262984037399, 0.0068003153428435326, 0.09397424012422562, -0.0523529127240181, 0.03780638054013252, -0.015423302538692951, 0.029167350381612778, 0.01019437238574028, 0.023989612236618996, -0.03344425559043884, -0.07926471531391144, -0.09238854795694351, 0.04794330149888992, 0.01872367039322853, -0.029179377481341362, -0.05339968949556351, -0.04575541242957115, -0.004491546656936407, -0.009650425054132938, 0.026945313438773155, -0.02115861512720585, 0.06488905847072601, -0.06647083908319473, 0.008904196321964264, 0.010536684654653072, -0.06012551859021187, -0.00022655133216176182, -0.10175421833992004, 0.062001921236515045, -0.054452817887067795, 0.01785552129149437, -0.06749527156352997, -0.04883178323507309, -0.023449009284377098, 0.040745027363300323, 0.002448269398882985, 0.07842953503131866, -0.019806355237960815, -0.08275315910577774, 0.01131721492856741, 0.0482926219701767, 0.01892486959695816, 0.005685009527951479, -0.0055344682186841965, -0.0034555341117084026, -0.07923021167516708, 0.06387833505868912, 0.05978211387991905, -0.001252106623724103, 0.07216084003448486, -0.01223798282444477, 0.09716741740703583, 0.009659498929977417, -0.09404221922159195, -0.10122949630022049, -0.003581057768315077, 0.07885389029979706, 0.05305042862892151, -0.04988719895482063], "PMOVMSKBrr":[0.07294902205467224, -0.00040799094131216407, -0.01483855675905943, -0.02571418508887291, 0.08466307818889618, -0.03447218984365463, -0.05685977265238762, -0.019133185967803, 0.06332023441791534, -0.061352625489234924, -0.023195402696728706, -0.05378473922610283, -0.05650350823998451, 0.06583224982023239, -0.012845925986766815, -0.052972156554460526, 0.049470845609903336, -0.04565730318427086, 0.09717552363872528, -0.014171762391924858, 0.013508875854313374, 0.004057068843394518, -0.020556267350912094, -0.10475417971611023, 0.018426941707730293, -0.07273723930120468, 0.01702595315873623, -0.013097747229039669, -0.07530277967453003, 0.05442536994814873, -0.0601920410990715, -0.05255919322371483, -0.07305102050304413, 0.02758030779659748, 0.06180129200220108, 0.10606050491333008, 0.046477098017930984, -0.024062691256403923, 0.07360008358955383, -0.011283098720014095, -0.03712400794029236, -0.09973011910915375, 0.018314119428396225, 0.009135990403592587, -0.01891133189201355, 0.00915572326630354, 0.006080301944166422, -0.02368554100394249, -0.019582828506827354, 0.051494162529706955, -0.010953089222311974, 0.011621126905083656, 0.010515356436371803, 0.011188569478690624, -0.0202876515686512, 0.038686931133270264, -0.066365085542202, 0.014182188548147678, 0.00445093447342515, 0.05712618678808212, -0.04463819041848183, -0.10292281210422516, -0.011173201724886894, 0.0029098563827574253, 0.06890314072370529, 0.06398330628871918, 0.03248615562915802, -0.05457807704806328, -0.006898659747093916, 0.038892313838005066, -0.09130232781171799, 0.013324378058314323, -0.033766016364097595, -0.043404608964920044, 0.018701359629631042, -0.03784232959151268, -0.05014420300722122, 0.04404780641198158, 0.09254389256238937, 0.09839074313640594, -0.028214668855071068, 0.03262662887573242, 0.04281335324048996, 0.07356158643960953, -0.0773080587387085, 0.026536725461483, -0.06819723546504974, 0.03335537016391754, 0.09355103969573975, -0.052649617195129395, -0.08467497676610947, -0.06516479700803757, -0.07499512284994125, 0.023276200518012047, -0.06063856557011604, -0.044472258538007736, 0.03155883774161339, -0.011262890882790089, 0.04045895114541054, 0.012343645095825195], "PMULUDQrr":[-0.018331514671444893, 0.04249238595366478, 0.0718526765704155, 0.03221653401851654, -0.04829120263457298, -0.02055567130446434, 0.05200991779565811, -0.04337913170456886, -0.02698952704668045, 0.05037892237305641, 0.014545431360602379, 0.09035851061344147, 0.0777752548456192, -0.06762461364269257, 0.032133519649505615, 0.048851024359464645, 0.01295433659106493, 0.054136257618665695, 0.09599477052688599, 0.024489495903253555, 0.05683024227619171, -0.05242127552628517, -0.043476004153490067, 0.004586773458868265, 0.024281315505504608, 0.03402777388691902, 0.0033939755521714687, 0.049474406987428665, 0.0011405921541154385, 0.06828528642654419, 0.08426304161548615, -0.029339993372559547, -0.04173621907830238, -0.03966334089636803, -0.03011258877813816, -0.07684683799743652, 0.040944185107946396, -0.04709877818822861, 0.07968004047870636, 0.07534269988536835, -0.006957313045859337, -0.0016522067598998547, -0.017229178920388222, 0.030470186844468117, 0.05390452966094017, 0.05233803763985634, 0.045554302632808685, -0.03710555285215378, 0.05699322372674942, 0.019888387992978096, 0.10152119398117065, 0.026563912630081177, -0.0018862299621105194, -0.02453959546983242, -0.06107368320226669, -0.04910692200064659, -0.06316373497247696, 0.04648333042860031, -0.00939352996647358, 0.030374331399798393, 0.0027768383733928204, 0.07302171736955643, -0.0035402378998696804, 0.054474033415317535, -0.0739617869257927, 0.01190911140292883, -0.019428657367825508, -0.006644500885158777, -0.04998863860964775, 0.03215506672859192, 0.054085105657577515, 0.047874726355075836, 0.10735851526260376, 0.030255280435085297, 0.029996531084179878, 0.006218941882252693, 0.04892734810709953, 0.06425125896930695, -0.017792150378227234, 0.041398752480745316, -0.017293022945523262, -0.011015499010682106, -0.02933122031390667, -0.005825115367770195, -0.07212502509355545, 0.10469445586204529, 0.009840304031968117, 0.026172513142228127, 0.002459621522575617, -0.02771947532892227, -0.006639100145548582, -0.04062161594629288, -0.0746249407529831, 0.04523816704750061, -0.07439430058002472, 0.06977812945842743, 0.008738852106034756, 0.06937781721353531, 0.07391723990440369, -0.09542208909988403], "POPCNT":[0.032459065318107605, 0.11127372831106186, -0.004006756469607353, 0.06373029947280884, 0.07161973416805267, -0.07966824620962143, -0.014274416491389275, 0.02168503776192665, -0.060636017471551895, -0.051414258778095245, 0.003268218832090497, 0.05552225932478905, 0.01940925046801567, -0.05398592725396156, 0.09021458029747009, -0.060922130942344666, -0.0407782681286335, -0.027882883325219154, 0.012706448324024677, -0.02730434015393257, 0.05854162946343422, -0.0798129290342331, -0.00179530237801373, 0.04958317428827286, -0.04621487483382225, 0.0524308979511261, -0.03889109939336777, 0.07240460813045502, 0.06366933137178421, 0.029314585030078888, -0.014743340201675892, -0.021233027800917625, 0.06803205609321594, -0.01269250176846981, -0.033408213406801224, 0.09638478606939316, -0.02009841799736023, -0.014619074761867523, 0.022498659789562225, 0.006679723970592022, -0.016163295134902, 0.09717728197574615, -0.010882971808314323, -0.09489153325557709, 0.046623144298791885, -0.04596618935465813, -0.026864662766456604, 0.01605546846985817, 0.05979238823056221, -0.024411896243691444, 0.039511535316705704, -0.0108433086425066, -0.05629622936248779, 0.02339898608624935, -0.025785285979509354, 0.011886742897331715, 0.08834438771009445, -0.08506806194782257, 0.021776534616947174, 0.01446699257940054, -0.009117010980844498, -0.022380229085683823, -0.0541100800037384, -0.040569182485342026, -0.02888612262904644, 0.07774273306131363, -0.052350424230098724, -0.039240963757038116, 0.004771160893142223, 0.014987779781222343, -0.05511622130870819, 0.019763313233852386, -0.0920683741569519, 0.021821241825819016, 0.10812623798847198, -0.06422155350446701, -0.07388156652450562, 0.00949418731033802, -0.06905169039964676, 0.006180475000292063, -0.02844754233956337, 0.11084792017936707, -0.03348945826292038, 0.06860767304897308, -0.0214154664427042, -0.0008655296987853944, -0.020698973909020424, 0.03369581326842308, 0.019848104566335678, 0.013533092103898525, 0.03423681482672691, 0.014547858387231827, 0.02418140508234501, -0.013769546523690224, -0.09633788466453552, 0.01689709909260273, -0.01452709175646305, 0.047873757779598236, -0.0012036423431709409, 0.03720762953162193], @@ -145,7 +135,6 @@ "RET":[-0.09685279428958893, 0.0101965656504035, -0.04206235706806183, -0.05282443389296532, 0.050776951014995575, -0.006812752690166235, 0.09618920832872391, 0.04637071117758751, -0.018928129225969315, -0.04118828848004341, -0.06039129197597504, -0.018619466572999954, -0.07845143973827362, -0.14034120738506317, -0.03397035226225853, -0.028233898803591728, -0.08162513375282288, 0.048710327595472336, -0.04177732393145561, -0.08455172181129456, 0.00312337395735085, 0.03531079366803169, -0.057201240211725235, 0.09391707926988602, -0.02847883477807045, 0.01840023323893547, -0.04936904460191727, 0.027487540617585182, 0.08041024953126907, -0.08714525401592255, 0.11963017284870148, -0.0762581005692482, -0.06482874602079391, 0.038007382303476334, -0.003661463735625148, 0.0064629544503986835, -0.08281382918357849, -0.053177930414676666, 0.01966426707804203, -0.04822755232453346, -0.0474051795899868, 0.026990806683897972, -0.057971399277448654, 0.12347304075956345, -0.02745792828500271, 0.0832793116569519, 0.03029884397983551, -0.032751865684986115, -0.022912420332431793, -0.030569355934858322, 0.0971289873123169, 0.07298070192337036, 0.0306894201785326, 0.05817654728889465, 0.005174126010388136, 0.042281877249479294, 0.01975836046040058, -0.11205509305000305, 0.05081645026803017, 0.0034761943388730288, -0.03858469799160957, 0.007316718343645334, 0.07441510260105133, 0.004579664673656225, -0.021868426352739334, 0.01116174180060625, 0.061042461544275284, 0.029598504304885864, -0.06691239774227142, 0.03223221376538277, 0.0867755264043808, 0.05488765984773636, -0.019738517701625824, -0.030367519706487656, -0.06396497040987015, -0.0022451707627624273, -0.06131305173039436, -0.03129804506897926, -0.05657076835632324, 0.009733426384627819, -0.08145039528608322, -0.09049411863088608, 0.004821183159947395, 0.038612931966781616, -0.019062234088778496, -0.021097682416439056, -0.06061801686882973, 0.019766775891184807, 0.0276743546128273, -0.057942990213632584, -0.033430278301239014, 0.0043391571380198, 0.05848158895969391, 0.0826464518904686, 0.09988056123256683, -0.05677378550171852, -0.11326800286769867, 0.051275406032800674, 0.01158174965530634, 0.04368240013718605], "ROL":[0.026423713192343712, 0.08523924648761749, 0.005345864687114954, 0.027778491377830505, 0.06572498381137848, 0.056946828961372375, -0.03009108640253544, -0.05564097315073013, 0.07753216475248337, -0.07402804493904114, -0.05589171126484871, -0.050976503640413284, 0.041095346212387085, -0.06708681583404541, 0.08517566323280334, 0.02110634744167328, -0.027871981263160706, 0.0005450723110698164, 0.07511565834283829, 0.0016275837551802397, 0.04902505874633789, 0.024746844545006752, 0.08780711144208908, -0.06167766824364662, 0.06365402787923813, 0.06462119519710541, -0.04920244216918945, 0.056112516671419144, 0.10561680048704147, 0.07879003882408142, 0.03879575803875923, -0.03582729026675224, 0.004805437754839659, 0.030719229951500893, 0.0558336041867733, 0.04387545958161354, 0.020841658115386963, 0.015068157576024532, -0.008266274817287922, 0.05914990231394768, -0.01581275276839733, 0.060716625303030014, -0.02257946878671646, 0.00995479617267847, 0.002104438142850995, 0.03806104138493538, 0.010437156073749065, 0.039603881537914276, -0.02074524573981762, 0.024094516411423683, 0.031944990158081055, -0.07122939079999924, -0.023190783336758614, 0.006518832873553038, -0.04528677463531494, -0.02354210615158081, -0.03518632799386978, -0.07059651613235474, -0.017474880442023277, 0.06688393652439117, -0.07900173962116241, -0.05843310430645943, 0.05351021885871887, -0.05724814161658287, -0.02697751857340336, -0.031128596514463425, 0.03040527179837227, -0.009157841093838215, -0.07642515003681183, -0.042137425392866135, -0.031383614987134933, -0.07586777210235596, 0.0489036925137043, 0.0657171905040741, 0.027123138308525085, 0.034842655062675476, -0.035231154412031174, 0.009778738021850586, -0.06150955334305763, 0.042132262140512466, 0.08945925533771515, -0.07213590294122696, -0.0518047958612442, -0.07094760239124298, 0.07041053473949432, 0.1046413779258728, 0.02394813485443592, -0.014966128394007683, -0.04967860132455826, -0.03941388055682182, -0.10642798990011215, -0.03915626183152199, -0.10921923071146011, -0.035421375185251236, 0.039855729788541794, 0.04145469889044762, -0.025123557075858116, 0.06743432581424713, -0.02060243859887123, 0.02994687482714653], "ROR":[-0.03797177970409393, -0.03406170755624771, -0.014866529032588005, -0.002243943279609084, 0.024476991966366768, -0.08789698034524918, 0.02924288995563984, -0.03145875036716461, -0.0030907171312719584, 0.013303312472999096, 0.05823688209056854, 0.06085257977247238, 0.0682583823800087, 0.06680850684642792, -0.0008473473135381937, 0.056926507502794266, 0.05309343710541725, 0.017690004780888557, -0.028605103492736816, 0.02303914539515972, -0.07054196298122406, -0.011117611080408096, -0.0012138717574998736, -0.0877937376499176, -0.005339651368558407, -0.029197875410318375, -0.06283852458000183, 0.00677055399864912, 0.07529082894325256, -0.005144342314451933, -0.03930655121803284, -0.0469868965446949, 0.06799482554197311, -0.013870766386389732, -0.07353825122117996, -0.10425472259521484, 8.023920236155391e-05, 0.05196760594844818, -0.024758316576480865, -0.03249195218086243, -0.0037688545417040586, -0.0033505349420011044, 0.04382188990712166, 0.035679250955581665, -0.04743441194295883, 0.031142324209213257, -0.04255860671401024, -0.02310662344098091, -0.04199622571468353, -0.034439221024513245, -0.06397263705730438, -0.011049525812268257, -0.055776823312044144, 0.039233505725860596, 0.016644736751914024, -0.08737850934267044, 0.0151174021884799, 0.10728199779987335, -0.0006503594922833145, -0.060365013778209686, -0.05337308719754219, -0.021152105182409286, 0.06532585620880127, -0.00926337018609047, -0.08149554580450058, 0.0485830195248127, 0.034749776124954224, -0.05045035108923912, -0.06366241723299026, 0.0544571727514267, 0.07594002038240433, 0.027496861293911934, -0.047294747084379196, 0.017491186037659645, -0.034639474004507065, 0.006060798652470112, -0.07335491478443146, -0.054728057235479355, -0.0018357941880822182, -0.07110298424959183, 0.09072742611169815, 0.03083305060863495, 0.054598040878772736, -0.028097454458475113, -0.012821618467569351, 0.008708478882908821, -0.06561881303787231, -0.04448843002319336, 0.08860815316438675, -0.050312310457229614, 0.09012935310602188, -0.004711236339062452, -0.020932462066411972, -0.10615857690572739, -0.005630030296742916, 0.03976801037788391, 0.040199730545282364, 0.07235082983970642, -7.448523683706298e-05, 0.076942078769207], - "RegisterMask":[0.009287647902965546, 0.029691029340028763, -0.03465871885418892, 0.032606374472379684, -0.007339544594287872, 0.03367740660905838, -0.0661492720246315, 0.0436118021607399, -0.002896533813327551, 0.028440887108445168, -0.06791415065526962, 0.004055356606841087, -0.01596181094646454, -0.003846745239570737, 0.06762582808732986, -0.025632556527853012, 0.08132420480251312, 0.025554664433002472, -0.08994632959365845, 0.02521730400621891, 0.023826507851481438, 0.0004487193073146045, 0.01047397032380104, 0.03246957063674927, -0.033482909202575684, 0.05051224306225777, 0.005778896156698465, -0.0006257061613723636, 0.00522293895483017, -0.04666636884212494, 0.022335125133395195, -0.022150320932269096, 0.04510439187288284, -0.02769547514617443, 0.026804683730006218, 0.0710473507642746, -0.014513042755424976, 0.0695318952202797, 0.048469461500644684, -0.008654370903968811, -0.028613079339265823, -0.02918054349720478, -0.022721733897924423, -0.0004791628452949226, 0.011470172554254532, 0.08561886101961136, 0.07125027477741241, -0.05847848951816559, 0.011811288073658943, -0.025244031101465225, -0.03665035218000412, -0.03482883796095848, 0.04196881502866745, 0.06909161061048508, 0.02365143597126007, -0.0689089447259903, -0.0707414448261261, -0.03962424397468567, -0.025703679770231247, 0.06502455472946167, 0.057676125317811966, 0.026916807517409325, 0.024921152740716934, 0.009799988009035587, -0.018656229600310326, 0.009880480356514454, -0.06516153365373611, 0.019290866330266, 0.02236226759850979, -0.02598695270717144, -0.00299705658107996, 0.019448822364211082, -0.014883329160511494, 0.06645222008228302, -0.028751512989401817, -0.01589173451066017, 0.026225939393043518, 0.07285763323307037, -0.06037987396121025, -0.027615630999207497, -0.039930179715156555, -0.07122864574193954, 0.029825787991285324, 0.026364129036664963, -0.04438399150967598, 0.07015394419431686, -0.013950555585324764, 0.004367176443338394, 0.020521124824881554, 0.02030497044324875, 0.011951270513236523, 0.06765977293252945, -0.015042259357869625, 0.005189584568142891, -0.07532864063978195, -0.010886142030358315, 0.006792030762881041, -0.06348442286252975, 0.031859394162893295, -0.052482619881629944], "SAR":[-0.058561697602272034, -0.014889497309923172, -0.009758144617080688, 0.00019282882567495108, -0.040600407868623734, -0.05907759070396423, 0.033052023500204086, -0.04672614857554436, -0.050173744559288025, -0.06619776040315628, 0.005385559983551502, 0.05449973791837692, -0.0035163976717740297, -0.12835650146007538, 0.06576846539974213, 0.030572880059480667, -0.014856431633234024, 0.011252024210989475, 0.018954169005155563, -0.10070347040891647, -0.032273050397634506, 0.007221086882054806, -0.020879192277789116, 0.0691007450222969, 0.01286559458822012, -0.020694725215435028, -0.07545264810323715, -0.07742343097925186, -0.005103116389364004, 0.10223732143640518, -0.08521754294633865, 0.07459715753793716, 0.006563629489392042, -0.059839747846126556, -0.023294325917959213, 0.04265525937080383, -0.011012998409569263, -0.02257128618657589, -0.033783379942178726, 0.0368407666683197, -0.048024341464042664, -0.037417128682136536, 0.09010431170463562, 0.09016482532024384, -0.07939734309911728, 0.03274676203727722, 0.0388714037835598, -0.03253694251179695, 0.020820122212171555, -0.0039061333518475294, 0.025425976142287254, -0.01847209222614765, 0.013026821427047253, 0.08873090147972107, -0.010358930565416813, -0.026935681700706482, 0.04795868322253227, -0.06173045188188553, -0.02299962192773819, -0.09966729581356049, 0.008027775213122368, 0.03202224150300026, -0.08922284096479416, 0.03263246268033981, 0.0702379047870636, 0.08681228011846542, -0.053993936628103256, 0.0009890834335237741, -0.060423459857702255, 0.08636976033449173, 0.04784319922327995, 0.05135124549269676, -0.023515762761235237, 0.015414481982588768, -0.06941155344247818, 0.004289102740585804, -0.10909571498632431, 0.014149827882647514, -0.025285568088293076, 0.06270574778318405, 0.0669349953532219, 0.03599094599485397, 0.0436582937836647, 0.06281902641057968, -0.04479018226265907, -0.04126136004924774, -0.026938045397400856, -0.0349077507853508, 0.002964549232274294, -0.04247729107737541, 0.009402072057127953, 0.10574454814195633, 0.03262042999267578, 0.08030910044908524, -0.031244831159710884, 0.010621835477650166, -0.02628093585371971, 0.046942535787820816, -0.022998474538326263, 0.009223603643476963], "SBB":[-0.040700677782297134, -0.01474229246377945, 0.09491399675607681, 0.015464535914361477, -0.05408482998609543, -0.09618491679430008, -0.014700816012918949, -0.06255258619785309, 0.09308589994907379, 0.01991264335811138, 0.04899228736758232, -0.03322140499949455, -0.03979090601205826, -0.161369189620018, 0.0957769826054573, -0.045866891741752625, -0.03776619955897331, 0.09559016674757004, -0.0063005415722727776, 0.07086999714374542, -0.004713557660579681, 0.10066409409046173, -0.053719762712717056, 0.07039386034011841, 0.01788068749010563, 0.01069885678589344, -0.003849055850878358, 0.07810717821121216, 0.10748977214097977, -0.09462521225214005, -0.06140149384737015, -0.028434589505195618, 0.0395897701382637, 0.05396975204348564, 0.009982907213270664, -0.014297235757112503, 0.018435295671224594, -0.04264533147215843, -0.0471954308450222, -0.008587008342146873, 0.010918513871729374, -0.03147284686565399, 0.08885594457387924, 0.05178891867399216, 0.05807363614439964, 0.028190992772579193, 0.04205470532178879, 0.00935433991253376, 0.027427801862359047, -0.02180725708603859, -0.06614664196968079, 0.021269382908940315, 0.0585390068590641, 0.12827278673648834, 0.0420454666018486, 0.06753493845462799, -0.05479112267494202, -0.06480395793914795, 0.02621031180024147, -0.07586188614368439, -0.04831313341856003, 0.016674980521202087, -0.006851759273558855, 0.04103298485279083, 0.005965645890682936, -0.02317493036389351, -0.03966135531663895, -0.02576862834393978, -0.0916895642876625, 0.029451601207256317, 0.044677067548036575, 0.026928072795271873, -0.10388721525669098, 0.021140936762094498, -0.06990157812833786, 0.048356350511312485, -0.08890967816114426, -0.0003503488842397928, -0.10245566070079803, -0.0582563653588295, 0.04677841439843178, 0.04697449132800102, -0.04022470489144325, 0.02759086713194847, -0.02867579087615013, 0.013355317525565624, 0.011504339054226875, -0.04230086877942085, -0.045500747859478, -0.03741880878806114, 0.022458063438534737, 0.05192841589450836, 0.008104681968688965, -0.08284809440374374, 0.059996478259563446, 0.07762005180120468, -0.0031316280364990234, 0.06990513950586319, -0.020328091457486153, -0.0027387691661715508], "SETB_C":[-0.007473401725292206, -0.06315194815397263, 0.0693482831120491, 0.05207814276218414, -0.08006429672241211, -0.005448522046208382, -0.007457572966814041, 0.011581258848309517, -0.05411145091056824, -0.06738752871751785, 0.013233165256679058, -0.0677611380815506, 0.01846255734562874, -0.09321920573711395, -0.03116961196064949, 0.05861300230026245, -0.001519175828434527, 0.08354826271533966, 0.023905213922262192, 0.0124649154022336, 0.08983863890171051, 0.055941760540008545, 0.07229111343622208, 0.09052376449108124, -0.013718990609049797, 0.06642850488424301, 0.0822976604104042, 0.010060268454253674, 0.04116540774703026, -0.03301406651735306, 0.07296404242515564, -0.03534134477376938, 0.012426529079675674, -0.005412430968135595, 0.06087784096598625, 0.03547677770256996, -0.007232111878693104, 0.06580550968647003, -0.0037480974569916725, 0.02971699647605419, -0.06937503069639206, 0.08572175353765488, -0.02138090692460537, -0.0053040217608213425, -0.029469722881913185, -0.05332958698272705, 0.10073655843734741, -0.03199373558163643, -0.01775289885699749, -0.09716105461120605, 0.06483447551727295, 0.028643250465393066, -0.029914388433098793, 0.007070464547723532, 0.006640028208494186, -0.0033612342085689306, 0.005682659335434437, 0.011877131648361683, -0.038144148886203766, 0.03381858021020889, 0.02083616890013218, 0.029199717566370964, 0.07813020050525665, -0.006173993926495314, -0.016444502398371696, -0.08474857360124588, 0.03877300024032593, -0.046462398022413254, 0.02460806630551815, 0.053950369358062744, 0.01389766950160265, 0.03323421627283096, 0.04349416866898537, 0.04381947219371796, 0.10320119559764862, -0.1117740124464035, 0.03045269101858139, -0.03870442137122154, -0.07607249915599823, -0.00020808610133826733, -0.09519094228744507, 0.06727365404367447, -0.04469249024987221, 0.07144048810005188, -0.08811240643262863, 0.001203814405016601, 0.06901863217353821, 0.05462682247161865, -0.03902207687497139, -0.05885632708668709, -0.028275305405259132, -0.07151838392019272, -0.059166230261325836, -0.015570566058158875, 0.06314826756715775, -0.040293656289577484, 0.021595094352960587, -0.04083842411637306, -0.09180022031068802, -0.0309903621673584], @@ -263,26 +252,6 @@ "VINSERTI":[0.09046315401792526, 0.015515458770096302, 0.04200809448957443, -0.046130646020174026, -0.045843131840229034, 0.003743539797142148, 0.025380345061421394, -0.021319231018424034, 0.03850293532013893, 0.006397924851626158, -0.06982530653476715, -0.016159888356924057, -0.09164588898420334, 0.1245846077799797, -0.05104857683181763, -0.011446121148765087, -0.06936608999967575, -0.02683587372303009, -0.0337526798248291, -0.005495472811162472, 0.023584537208080292, -0.0771733894944191, -0.026287894695997238, -0.01172191184014082, 0.09737047553062439, 0.0375351756811142, 0.03280220180749893, -0.014072075486183167, 0.06032971292734146, 0.0072259255684912205, -0.08368974179029465, 0.054626062512397766, -0.021156134083867073, -0.09647785127162933, 0.07431179285049438, -0.09039300680160522, -0.07652204483747482, -0.002478789770975709, -0.012967151589691639, 0.08174770325422287, -0.00968913547694683, 0.015551612712442875, 0.08655177801847458, -0.056927114725112915, -0.011370684020221233, -0.0408773347735405, 0.04413295164704323, 0.05919815972447395, 0.08101782202720642, -0.008914918638765812, -0.019233090803027153, 0.05211508646607399, -0.010292282328009605, 0.021839600056409836, 0.0016950241988524795, -0.031931016594171524, 0.004831018857657909, 0.015328328125178814, -0.015326892025768757, -0.05457184836268425, 0.03782501816749573, -0.014512602239847183, 0.00869232788681984, -0.04001179710030556, -0.00994281005114317, -0.041689563542604446, 0.060574647039175034, 0.044912341982126236, 0.05958174169063568, -0.035378437489271164, 0.08524063974618912, 0.012326095253229141, -0.052227456122636795, -0.015090923756361008, -0.012893415056169033, -0.019565775990486145, -0.03284028172492981, -0.02651887759566307, 0.02436136268079281, 0.004743371158838272, 0.019924448803067207, -0.046163417398929596, 0.005615816451609135, -0.03354670852422714, 0.00801338255405426, 0.02501787059009075, 0.03313247114419937, -0.012842012569308281, 0.04856807366013527, -0.031942710280418396, -0.026277944445610046, 0.11483185738325119, 0.015686793252825737, 0.052031729370355606, 0.025188622996211052, -0.021448785439133644, 0.05062439665198326, -0.030834710225462914, 0.02746596559882164, 0.027027780190110207], "VINSERTPSrm":[0.010265239514410496, 0.03508870303630829, 0.0182888712733984, 0.01066108699887991, 0.09608251601457596, -0.0390457920730114, -0.02508910745382309, -0.051061324775218964, 0.051924970000982285, 0.02405509166419506, 0.07347054034471512, -0.023432396352291107, 0.03053455613553524, 0.11051331460475922, -0.07987380027770996, -0.07169938087463379, -0.06944284588098526, -0.010227087885141373, 0.01555782649666071, 0.0033066831529140472, -0.017572278156876564, 0.018880389630794525, -0.03347453102469444, 0.023936258628964424, 0.04189354181289673, -0.008910899050533772, 0.045309878885746, 0.039228133857250214, -0.0026367944665253162, 0.01713910512626171, -0.0038225038442760706, -0.02550170198082924, -0.04479162022471428, -0.11607012152671814, -0.05566885322332382, -0.03926549106836319, -0.05618799477815628, 0.0587196871638298, -0.003744689514860511, 0.09148088097572327, -0.008691483177244663, 0.060393813997507095, 0.05017181858420372, 0.05314680561423302, 0.010222898796200752, 0.04390108212828636, 0.06256565451622009, -0.039335936307907104, -0.030927496030926704, -0.010439696721732616, -0.09615844488143921, 0.04857023432850838, -0.021018074825406075, -0.04949686676263809, 0.0718517154455185, -0.0008082279819063842, -0.05119258537888527, -0.016725104302167892, -0.031902290880680084, 0.07473913580179214, -0.07040376961231232, -0.06263279169797897, -0.01866966485977173, -0.04580819234251976, -0.0018242622027173638, 0.02124813199043274, 0.01608111523091793, 0.033293697983026505, 0.04724595695734024, 0.06764823198318481, -0.010222701355814934, -0.09166357666254044, 0.0065320758149027824, 0.03907076641917229, 0.014404546469449997, -0.04371245950460434, -0.036747194826602936, -0.013570152223110199, -0.04874530807137489, -0.048001520335674286, 0.015114152804017067, 0.018710903823375702, 0.06920618563890457, -0.04024452716112137, 0.07851467281579971, -0.03879975154995918, 0.039278119802474976, -0.06678346544504166, -0.02139596827328205, 0.03483150899410248, 0.047795433551073074, -0.08071790635585785, 0.05023014172911644, -0.05863310769200325, 0.04391729459166527, -0.006796867586672306, -0.07156652212142944, -0.016402525827288628, 0.07568787783384323, 0.01640038751065731], "VINSERTPSrr":[0.022902479395270348, 0.024018414318561554, 0.011011038906872272, 0.009072761051356792, -0.057881347835063934, 0.03739643841981888, 0.006768766324967146, 0.07734010368585587, 0.001509909750893712, 0.043335434049367905, -0.07244917005300522, -0.1078081876039505, 0.027186766266822815, 0.018834171816706657, 0.007436624728143215, -0.048498328775167465, 0.09450934827327728, -0.015420452691614628, 0.014672537334263325, -0.0012827727477997541, 0.019664635881781578, -0.026565955951809883, -0.04819858446717262, -0.0004387270309962332, 0.01676507107913494, -0.0014571163337677717, 0.015105879865586758, 0.04038102179765701, 0.008408628404140472, 0.07757255434989929, -0.09923559427261353, -0.04181523248553276, -0.0313955582678318, -0.006045420188456774, 0.05904707312583923, -0.014993838034570217, 0.03219055384397507, 0.058543696999549866, -0.06872320920228958, 0.021718619391322136, -0.08984571695327759, 0.06557019799947739, -0.018167613074183464, -0.011413732543587685, 0.036035921424627304, 0.10104569792747498, 0.05836406350135803, -0.02576756477355957, 0.03827998414635658, -0.06874323636293411, 0.01668366976082325, 0.048310816287994385, -0.010213772766292095, -0.035550933331251144, -0.03385040909051895, 0.004614332225173712, 0.018951643258333206, 0.10679180175065994, -0.019135646522045135, -0.011955377645790577, 0.028140606358647346, -0.08185642957687378, -0.015775075182318687, -0.011507326737046242, 0.07914295047521591, -0.030148068442940712, 0.11757981777191162, 0.00040086795343086123, 0.056880321353673935, -0.014461426064372063, -0.0008378245402127504, -0.06245473772287369, -0.05332277715206146, -0.038401950150728226, 0.005011103581637144, 0.0368003323674202, -0.021230563521385193, 0.01497745979577303, 0.0372738242149353, 0.07988940924406052, -0.013381360098719597, 0.0036820468958467245, 0.07501927763223648, -0.0996084213256836, 0.028014982119202614, -0.09410325437784195, 0.0007335525006055832, 0.004884959198534489, 0.040397197008132935, -0.07655651122331619, 0.05677357316017151, 0.005359896458685398, -0.047478366643190384, 0.03828851506114006, 0.03363208472728729, -0.041756175458431244, -0.00031817640410736203, 0.022837577387690544, 0.039567966014146805, 0.03662540763616562], - "VIRT_REG_FR32":[0.0034248235169798136, -0.011980761773884296, -0.0501178540289402, 0.0494888611137867, 0.06103336811065674, -0.06178610771894455, 0.007709897588938475, -0.011392943561077118, 0.06570645421743393, 0.0771368145942688, 0.0005577280535362661, 0.013396150432527065, -0.041660163551568985, 0.05122360959649086, 0.11354377865791321, -0.009875510819256306, -0.06466709822416306, 0.048170577734708786, 0.0007201629341579974, 0.06538223475217819, 0.08870227634906769, -0.05771782249212265, 0.009273379109799862, -0.03325295075774193, 0.01197165809571743, 0.06604835391044617, 0.08265330642461777, -0.005758166313171387, 0.02512396313250065, 0.03383670747280121, 0.038484204560518265, -0.06539343297481537, -0.013461028225719929, 0.001498897559940815, 0.05170154944062233, 0.06965786963701248, -0.07339458167552948, 0.05094756931066513, 0.01983451284468174, -0.06855696439743042, 0.07892709225416183, 0.06099703162908554, 0.08492864668369293, 0.05357863008975983, -0.009294840507209301, -0.0054923719726502895, -0.029938997700810432, 0.028260599821805954, 0.053790509700775146, -0.06574371457099915, -0.009621666744351387, -0.08131514489650726, -0.08474338054656982, 0.039622966200113297, 0.06945627927780151, 0.02545306645333767, 0.005390701815485954, 0.04582791030406952, -0.1103447750210762, -0.050917647778987885, 0.03087870217859745, 0.06918162852525711, 0.0548822283744812, -0.01838473603129387, 0.05597897991538048, 0.03548860549926758, -0.009931124746799469, -0.07856663316488266, 0.033994875848293304, 0.03467561677098274, 0.09580692648887634, -0.04153195023536682, -0.06732118874788284, -0.06857144832611084, 0.03419093042612076, -0.01200241968035698, -0.06983492523431778, 0.05929506942629814, -0.00041734304977580905, -0.026396293193101883, 0.05230500176548958, -0.006162640172988176, 0.044198282063007355, -0.028765834867954254, 0.031155114993453026, 0.06967037916183472, -0.0892564132809639, 0.028816571459174156, -0.037065472453832626, 0.06540130823850632, -0.01888667233288288, 0.030632384121418, 0.0359313078224659, 0.106044240295887, 0.03259910270571709, -0.0775517001748085, -0.04267778620123863, 0.04977935180068016, -0.01790289767086506, -0.11223265528678894], - "VIRT_REG_FR64":[0.08496882021427155, 0.049308884888887405, -0.016840212047100067, 0.010602951049804688, -4.6025739720789716e-05, -0.06524767726659775, 0.048670798540115356, -0.06444543600082397, -0.0031944462098181248, 0.05608433857560158, -0.03958145156502724, 0.05171080678701401, -0.03572545200586319, -0.054364755749702454, 0.052311528474092484, -0.0361458919942379, 0.024109655991196632, 0.15923210978507996, -0.07255382835865021, -0.011799084022641182, -0.06846465915441513, 0.0023571476340293884, 0.02642918936908245, -0.05057685822248459, 0.029800178483128548, -0.06036723777651787, -0.012272411957383156, -0.022802220657467842, -0.02426644042134285, 0.05623406544327736, -0.07506053894758224, -0.02078152634203434, 0.02549685165286064, -0.030025657266378403, -0.0627482682466507, 0.062375299632549286, 0.03684084117412567, 0.06365678459405899, 0.0004415051080286503, -0.002180535811930895, 0.05225013941526413, -0.0693102702498436, -0.03649357333779335, 0.005159272346645594, -0.03298519179224968, 0.041419681161642075, -0.05325934663414955, -0.017585784196853638, -0.03843431547284126, -0.002649943344295025, 0.033329058438539505, -0.04736043140292168, -0.043852102011442184, -0.06713785231113434, -0.03237355872988701, 0.012679073959589005, -0.01959240809082985, 0.07324203103780746, 0.07468831539154053, 0.03327644243836403, -0.01596391387283802, 0.12015434354543686, 0.051839299499988556, 0.00980563648045063, -0.08275608718395233, 0.04445798322558403, -0.03891860321164131, 0.10891054570674896, -0.008730625733733177, -0.051655255258083344, -0.05982912331819534, 0.04106972739100456, 0.06872759014368057, 0.013289053924381733, 0.03469584137201309, -0.06673429906368256, -0.0695682018995285, 0.047426726669073105, 0.02815094031393528, -0.05552271753549576, 0.0010567272547632456, -0.051840681582689285, -0.01704293303191662, -0.047185055911540985, 0.036965738981962204, 0.03452568128705025, -0.05430837720632553, 0.0383443646132946, 0.0003438846324570477, -0.030417989939451218, 0.02749026007950306, -0.0546082966029644, 0.03005768544971943, 0.0025131346192210913, 0.0013019279576838017, -0.054173994809389114, -0.008382225409150124, 0.02153395675122738, 0.011912085115909576, -0.10461334884166718], - "VIRT_REG_GR16":[0.09543223679065704, 0.03513967618346214, 0.08986528217792511, -0.012217407114803791, -0.02076001651585102, -0.04190119728446007, 0.01318269595503807, -0.010142332874238491, -0.011869532987475395, -0.040446147322654724, 0.06552371382713318, 0.04439055174589157, 0.08176156878471375, -0.06334159523248672, -0.033928077667951584, -0.00024628525716252625, 0.0244551170617342, -0.019419007003307343, -0.09592454880475998, 0.005961012560874224, 0.03278326243162155, -0.07028506696224213, -0.08484592288732529, -6.329250754788518e-05, 0.015018146485090256, -0.05068608745932579, 0.0732998326420784, 0.023434389382600784, 0.0002124009479302913, 0.060401707887649536, 0.013626078143715858, -0.010556582361459732, -0.005069760140031576, -0.004616749472916126, -0.034329116344451904, 0.060584329068660736, -0.05430089309811592, -0.029179023578763008, 0.042385730892419815, -0.0652197003364563, 0.09378205984830856, -0.05090794339776039, -0.008510591462254524, 0.0837036669254303, 0.009071480482816696, 0.04464874789118767, -0.012855015695095062, 0.06306030601263046, -0.08556588739156723, -0.05393703281879425, -0.06741822510957718, -0.03717748448252678, 0.017156923189759254, 0.07401604950428009, -0.06629005819559097, -0.04564857482910156, -0.055414989590644836, 0.039407771080732346, -0.04089723527431488, 0.06915309280157089, 0.030190052464604378, 0.027542876079678535, 0.03557966649532318, 0.05191207677125931, -0.03237364813685417, -0.02036256715655327, -0.071859210729599, -0.06704329699277878, 0.0336633175611496, 0.09511569887399673, 0.0048662531189620495, 0.05273270234465599, -0.056247059255838394, 0.06079721450805664, -0.04150049015879631, -0.08104457706212997, -0.10303051024675369, 0.04522428661584854, -0.04379847273230553, -0.019447194412350655, 0.0021319733932614326, -0.010465282015502453, 0.06857019662857056, -0.00443653529509902, -0.08039603382349014, -0.05012141168117523, 0.0875077098608017, -0.03053239732980728, -0.05321606993675232, 0.016501901671290398, -0.0563507042825222, -0.03187479078769684, -0.0015389680629596114, 0.022985411807894707, -0.05008963868021965, 0.028300117701292038, 0.02875804342329502, -0.024458128958940506, -0.022238614037632942, -0.049835607409477234], - "VIRT_REG_GR32":[-0.008479167707264423, -0.02941126376390457, 0.05343153327703476, 0.03769504278898239, -0.0006716987118124962, -0.0329299233853817, 0.03442851081490517, -0.06826753169298172, -0.09117511659860611, -0.018657755106687546, 0.029032904654741287, 0.02404048666357994, 0.010598761960864067, -0.0482308566570282, 0.06956348568201065, -0.027967501431703568, -0.07380961626768112, -0.021098148077726364, -0.0808446854352951, 0.0127912862226367, -0.01355082169175148, -0.040285225957632065, 0.035385165363550186, -0.001157263875938952, -0.026462145149707794, -0.08616211265325546, -0.044482193887233734, -0.010969695635139942, 0.04645564407110214, -0.018178211525082588, -0.038536932319402695, -0.027571648359298706, -0.007523007690906525, -0.02699458785355091, -0.039170436561107635, 0.12889482080936432, -0.04512789845466614, -0.03883056715130806, 0.051210880279541016, 0.03924906626343727, 0.036943964660167694, -0.016879307106137276, 0.011263007298111916, 0.053573690354824066, -0.018964825198054314, -0.041856080293655396, -0.036545924842357635, 0.07715532928705215, -0.041981130838394165, -0.04114629328250885, -0.04393022507429123, -0.030163627117872238, 0.0019487979589030147, 0.10988762229681015, 0.09039165079593658, -0.0035424421075731516, -0.06272851675748825, 0.007701062131673098, -0.01971622183918953, 0.06203003600239754, 0.048561323434114456, -0.04599940404295921, 0.00802221056073904, -0.002905400237068534, -0.1050020381808281, 0.003395768813788891, -0.07973644882440567, 0.008020970039069653, -0.08614815771579742, 0.0518532320857048, 0.021174483001232147, 0.03254232555627823, -0.01905026100575924, -0.0009989180834963918, -0.06409642845392227, -0.022425753995776176, -0.03563409671187401, 0.07717793434858322, -0.04553033784031868, -0.02112392708659172, -0.002374667674303055, 0.03828585892915726, -0.014221777208149433, -0.015974245965480804, -0.01805220916867256, 0.04202109947800636, -0.0841534212231636, 0.06608130037784576, -0.11586519330739975, 0.024179989472031593, 0.017091574147343636, 0.08567194640636444, -0.03692129999399185, 0.03266705200076103, -0.046154942363500595, 0.0040525165386497974, -0.03177625685930252, 0.039895471185445786, 0.042960215359926224, -0.05573953315615654], - "VIRT_REG_GR32_ABCD":[0.016604775562882423, -0.0028934956062585115, 0.041060179471969604, -0.025077441707253456, -0.018642406910657883, 0.023762650787830353, -0.028646549209952354, -0.02460283786058426, 0.005985732190310955, 0.01774146780371666, -0.004014404024928808, -0.05473850294947624, -0.0417158380150795, -0.06322457641363144, 0.060795728117227554, -0.036435071378946304, -0.04245952516794205, 0.08069344609975815, 0.035319335758686066, -0.012020719237625599, 0.045771341770887375, -0.10842540860176086, 0.046253710985183716, -0.004099135287106037, 0.030616935342550278, -0.08288344740867615, 0.08569363504648209, -0.014164377935230732, -0.004303323570638895, 0.09726760536432266, 0.06208871304988861, -0.04007713496685028, 0.005815347656607628, 0.02377200312912464, 0.07813961058855057, 0.03192306309938431, -0.006230524741113186, 0.10110925883054733, -0.023409254848957062, 0.030774405226111412, -0.011607645079493523, -0.03929119184613228, 0.004817614797502756, -0.013827506452798843, 0.07770339399576187, -0.07994075864553452, -0.03157062083482742, 0.06743781268596649, 0.014881699346005917, -0.030165214091539383, -0.07844353467226028, -0.04563238099217415, 0.09747181832790375, 0.057128582149744034, 0.04173563793301582, -0.0011194447288289666, -0.01902887038886547, -0.032171595841646194, 0.04824799671769142, 0.008433254435658455, 0.024706291034817696, 0.0746094286441803, 0.04515853151679039, -0.0018984260968863964, -0.10070884972810745, -0.01883143000304699, -0.07785795629024506, 0.10938235372304916, -0.08001448959112167, -0.07419873028993607, 0.010544849559664726, 0.025767439976334572, -0.1005895584821701, 0.05103800818324089, -0.03675306960940361, -0.020510872825980186, 0.022482097148895264, 0.06463642418384552, -0.03149804100394249, -0.021647030487656593, 0.04025804623961449, 0.003628256032243371, 0.03532547131180763, -0.08667688816785812, 0.018817460164427757, -0.01690257526934147, -0.10114696621894836, -0.022815177217125893, 0.024386661127209663, 0.10286301374435425, 0.030005114153027534, 0.0370776504278183, -0.008584428578615189, -0.077603779733181, -0.03588058054447174, 0.030617419630289078, -0.07383710891008377, 0.03215676173567772, 0.03288266062736511, -0.036702848970890045], - "VIRT_REG_GR32_NOREX":[0.019052108749747276, -0.006784944795072079, -0.05410394072532654, 0.001966317882761359, -0.06686867773532867, 0.013514372520148754, 0.030097918584942818, -0.03868359327316284, 0.004314934369176626, -0.06713679432868958, 0.02491898462176323, 0.027683967724442482, 0.035907283425331116, -0.023093875497579575, -0.0892200842499733, -0.1052003800868988, -0.03923499956727028, 0.08808581531047821, -0.10092058777809143, 0.03336786851286888, -0.08974049985408783, -0.015254802070558071, 0.039686985313892365, -0.010083628818392754, -0.03423550724983215, -0.08821681141853333, -0.05621311068534851, -0.020327769219875336, -0.016793876886367798, 0.08908043801784515, -0.04112761467695236, -0.050139520317316055, -0.01524045504629612, 0.05841142684221268, 0.08270087838172913, 0.0348736047744751, -0.016146546229720116, 0.05751227214932442, 0.05081859603524208, -0.07304663956165314, -0.047101784497499466, -0.02825125865638256, 0.0006340605323202908, 0.0008785317186266184, -0.044239338487386703, 0.007173972204327583, -0.029449066147208214, 0.07254412025213242, -0.026029080152511597, 0.025982191786170006, -0.09524690359830856, -0.052613094449043274, -0.1270490437746048, 0.05319184809923172, 0.1046818196773529, 0.0477570965886116, -0.06291303783655167, 0.04725426062941551, -0.05330964922904968, 0.04056742787361145, 0.01543382927775383, 0.03627128154039383, -0.048232536762952805, 0.014761016704142094, -0.007380587514489889, -0.008060632273554802, -0.021923277527093887, -0.022500980645418167, -0.08495079725980759, 0.045358967036008835, -0.04728720709681511, 0.03550735488533974, 0.03445536270737648, -0.01891610585153103, -0.09439470618963242, -0.044266197830438614, -0.07952893525362015, 0.05221104994416237, -0.03507477045059204, 0.04218391329050064, 0.040326621383428574, -0.0395088866353035, 0.02447870559990406, -0.04280063137412071, 0.06520935893058777, -0.003358252113685012, -0.057561881840229034, 0.01911463774740696, 0.05295571684837341, 0.030342884361743927, 0.03814920783042908, -0.03366788476705551, 0.03090745024383068, 0.09487249702215195, -0.002995486371219158, -0.012020634487271309, -0.029147809371352196, 0.09558248519897461, 0.02548893168568611, 0.0931544378399849], - "VIRT_REG_GR64":[0.02717440389096737, -0.026730243116617203, -0.023244258016347885, 0.04027782380580902, 0.006808254402130842, -0.027519788593053818, -0.01906559243798256, 0.027793627232313156, -0.00129543652292341, -0.03455121070146561, 0.021734628826379776, 0.035481199622154236, -0.07251942157745361, -0.025691546499729156, -0.03271827474236488, -0.13225725293159485, -0.0601421520113945, 0.09084498882293701, -0.10225717723369598, 0.004034099169075489, 0.023578351363539696, -0.041603971272706985, 0.04199974611401558, -0.014711204916238785, -0.04272732138633728, -0.12534455955028534, -0.023738788440823555, 0.005328727886080742, 0.038416482508182526, -0.026419155299663544, -0.041119154542684555, 0.00022502713545691222, -0.05204978585243225, -0.019709734246134758, -0.04102563485503197, 0.06480151414871216, 0.009224721230566502, 0.04627599939703941, 0.027821402996778488, -0.05595114827156067, 0.04526345059275627, 0.024196594953536987, 0.10446277260780334, 0.07561361789703369, -0.08028160035610199, -0.0314163975417614, 0.11944323033094406, 0.1025814488530159, -0.08457476645708084, 0.02227119728922844, -0.041679076850414276, -0.02260834351181984, 0.036674268543720245, 0.10488750785589218, 0.019218411296606064, -0.015966340899467468, -0.06852715462446213, 0.026523491367697716, -0.11090730130672455, -0.0021082640159875154, -0.048291631042957306, -0.032388005405664444, 0.015713853761553764, 0.03355225548148155, -0.06502845883369446, -0.010098783299326897, -0.09930021315813065, -0.017413528636097908, -0.055861033499240875, 0.0801810696721077, -0.03900628536939621, -0.03278445452451706, -0.0337282195687294, -0.11434067040681839, -0.04371264949440956, -0.01736009307205677, -0.05100121721625328, 0.07490750402212143, -0.014680330641567707, -0.02126181870698929, 0.018013890832662582, 0.0018135658465325832, 0.029781077057123184, -0.012477489188313484, -0.021443217992782593, 0.047576501965522766, -0.05993758141994476, -0.06040889024734497, 0.016642581671476364, 0.011624492704868317, -0.042229063808918, -0.007573941722512245, -0.04010608047246933, -0.006444427650421858, -0.014495199546217918, -0.04122597724199295, -0.08505907654762268, -0.004049300216138363, 0.06545045226812363, -0.04762336611747742], - "VIRT_REG_GR64_ABCD":[0.04577033221721649, -0.07758746296167374, 0.00799313560128212, -0.11011485010385513, -0.010862522758543491, 0.012709266506135464, 0.05257265642285347, -0.07354705780744553, 0.04262387007474899, 0.07554348558187485, -0.06358839571475983, 0.006669520866125822, 0.049098193645477295, 0.11183933168649673, -0.028112098574638367, 0.021986473351716995, -0.02839403599500656, -0.06199958547949791, 0.08614487200975418, -0.041216861456632614, 0.041238460689783096, 0.005937385838478804, 0.00200703926384449, -0.05337367579340935, 0.037919919937849045, -0.07485998421907425, -0.09153831005096436, -0.0554175041615963, -0.10251995176076889, -0.01289951242506504, -0.030631467700004578, 0.04197017475962639, -0.03578301519155502, 0.010593005456030369, -0.05836241692304611, 0.06809061765670776, 0.10871735960245132, -0.09833388775587082, -0.009873395785689354, -0.056898634880781174, 0.05946199968457222, 0.015534073114395142, 0.01677171140909195, -0.020233800634741783, -0.006396631710231304, -0.049332089722156525, 0.012649210169911385, 0.03756912052631378, 0.0033660116605460644, -0.09084216505289078, -0.07142844051122665, -0.0030346515122801065, 0.0019640070386230946, 0.038837920874357224, 0.011760945431888103, 0.04995080456137657, -0.06997165083885193, -0.035297296941280365, 0.01996617764234543, 0.01954355463385582, -0.0934600979089737, 0.030165065079927444, -0.007337240036576986, -0.05346155911684036, 0.0732186883687973, -0.04716489836573601, -0.06555212289094925, -0.018465254455804825, 0.051119767129421234, -0.03106619231402874, 0.0748852789402008, -0.02095886692404747, 0.006320921704173088, 0.03146332502365112, -0.08238139003515244, -0.03618254140019417, -0.014570276252925396, 0.062481846660375595, -0.0394093319773674, -0.05171547457575798, -0.044726233929395676, -0.01228095218539238, 0.09699232876300812, 0.07471026480197906, 0.03112417459487915, 0.022543631494045258, -0.08634103089570999, 0.059702761471271515, -0.013801504857838154, 0.004984616301953793, 0.045798566192388535, -0.03205988556146622, -0.06150995194911957, -0.02244667150080204, 0.03318532556295395, 0.03462471440434456, 0.03236381709575653, 0.0884014293551445, -0.01604369841516018, -0.05234146490693092], - "VIRT_REG_GR64_NOREX":[-0.03959479182958603, -0.06190898269414902, -0.02920372597873211, -0.09973344951868057, -0.004333901684731245, -0.08522991091012955, 0.0459987074136734, -0.057674553245306015, 0.037046968936920166, -0.05669403821229935, -0.02221340872347355, -0.062426190823316574, 0.05804889276623726, -0.02635439857840538, -0.045627325773239136, 0.03632078319787979, 0.07128578424453735, 0.07544906437397003, -0.0537678524851799, -0.04624016210436821, 0.014316501095890999, 0.05580946058034897, 0.05251356214284897, -0.08244197070598602, -0.08901460468769073, -0.07641059905290604, -0.04924754425883293, 0.05417120084166527, -0.0060508353635668755, -0.00814742036163807, -0.06154030188918114, 0.05966867506504059, -0.03231468051671982, 0.021429890766739845, 0.031103987246751785, 0.04343251883983612, -0.08997714519500732, 0.039365898817777634, 0.052908625453710556, -0.02683917060494423, -0.05547752603888512, -0.014131218194961548, 0.0016863569617271423, -0.041112788021564484, -0.010230163112282753, -0.06687774509191513, -0.006144971586763859, -0.08074352145195007, 0.04034091532230377, -0.08176303654909134, -0.004055786412209272, -0.0024839320685714483, -0.007289807312190533, 0.06915127485990524, 0.023709064349532127, 0.04671626538038254, 0.06229325756430626, 0.04707597941160202, 0.06800796836614609, -0.02885584905743599, 0.030613983049988747, -0.019083039835095406, 0.045457858592271805, 0.040770504623651505, -0.05441175401210785, -0.05712401866912842, 0.07744520157575607, -0.0756613239645958, -0.06890957802534103, -0.07997069507837296, 0.09348486363887787, -0.04511028528213501, 0.036194607615470886, 0.040017660707235336, 0.016245214268565178, 0.023104460909962654, 0.058383163064718246, 0.0679842159152031, -0.00921112485229969, -0.10036550462245941, 0.09075804799795151, -0.059704095125198364, -0.013338442891836166, -0.005139742512255907, 0.07807526737451553, 0.06255412846803665, -0.008151572197675705, -0.0624256506562233, 0.012590888887643814, 0.03665084019303322, -0.028498578816652298, -0.01614067517220974, 0.007552243769168854, -0.007216903381049633, 0.0760180801153183, -0.04200543463230133, 0.06412865966558456, -0.05136435106396675, -0.0024792966432869434, 0.06856651604175568], - "VIRT_REG_GR64_NOREX_NOSP":[-0.0656895712018013, 0.058077458292245865, -0.006653467658907175, 0.037784356623888016, 0.07274001836776733, 0.07232078164815903, 0.07074914127588272, 0.05637859180569649, 0.04296007752418518, 0.05499762296676636, -0.01783664897084236, -0.08387365937232971, -0.01376343984156847, -0.07938199490308762, -0.027822256088256836, -0.0663403570652008, 0.036170270293951035, -0.07460261881351471, 0.08652043342590332, 0.02483147382736206, -0.07939319312572479, 0.033202506601810455, 0.0903514102101326, -0.10181311517953873, 0.060751549899578094, 0.07619930803775787, 0.05017509311437607, -0.0470910519361496, 0.07713821530342102, -0.0426195003092289, -0.04506472498178482, 0.003363420255482197, -0.0017315347213298082, 0.06264199316501617, 0.005245774984359741, -0.027923958376049995, 0.09868567436933517, 0.06738796830177307, -0.10339145362377167, 0.0020383980590850115, 0.087734155356884, 0.011040030047297478, -0.05993311479687691, -0.05790332704782486, 0.01574312523007393, 0.009771298617124557, 0.022676382213830948, -0.009197148494422436, 0.03372732177376747, 0.08404259383678436, -0.015135225839912891, -0.04693703353404999, 0.09917140752077103, 0.007134507410228252, 0.020209072157740593, -0.00027669535484164953, -0.0351635180413723, 0.03751315921545029, -0.019665181636810303, 0.028500953689217567, 0.034186746925115585, -0.005931361112743616, 0.05645192414522171, -0.02027188241481781, -0.022675039246678352, -0.08812297880649567, -0.014896178618073463, -0.048788342624902725, 0.008708382956683636, 0.019917558878660202, -0.002275944221764803, 0.03409638628363609, 0.033304013311862946, 0.057676300406455994, 0.039842985570430756, -0.025169866159558296, 0.016520975157618523, -0.030201178044080734, -0.021718870848417282, -0.07023277878761292, -0.007528252899646759, 0.009067370556294918, -0.0460657961666584, 0.07117785513401031, -0.03609836474061012, -0.011893372051417828, -0.006047600414603949, 0.0179970171302557, 0.024480223655700684, -0.03918423503637314, 0.004897980485111475, 0.05040167644619942, 0.010113563388586044, -0.1074901670217514, -0.06277655810117722, -0.02934161201119423, -0.06922926008701324, -0.05638887360692024, 0.05314395949244499, 0.04588884115219116], - "VIRT_REG_GR64_NOSP":[0.0015277941711246967, -0.03938478231430054, -0.030811766162514687, 0.027071669697761536, 0.02127140760421753, 0.0015787228476256132, -0.07842491567134857, 0.004658385645598173, -0.05909501388669014, -0.03576778993010521, -0.07251477241516113, 0.12117832154035568, 0.04499363154172897, -0.009405314922332764, -0.01015283353626728, -0.002841090550646186, 0.0689091831445694, 0.10697457194328308, -0.09274765104055405, -0.027955353260040283, -0.0379958301782608, -0.044126156717538834, 0.04907212778925896, -0.038063473999500275, -0.003686746582388878, -0.08313410729169846, -0.045181579887866974, -0.011702840216457844, -0.006579228211194277, 0.046807315200567245, -0.045654296875, -0.03466613590717316, -0.08313826471567154, -0.06678880006074905, -0.027727074921131134, 0.036734677851200104, -0.040936414152383804, 0.05170389637351036, 0.038199927657842636, 0.02960256300866604, 0.0355701707303524, -0.02052776888012886, 0.06218089163303375, 0.10570456087589264, -0.036479029804468155, -0.008999336510896683, -0.031860992312431335, 0.07250168174505234, -0.061084795743227005, -0.057996805757284164, -0.010533110238611698, -0.018169214949011803, 0.017261315137147903, 0.10023517906665802, -0.044131457805633545, -0.07618662714958191, -0.09124933928251266, 0.01819406822323799, -0.05906827375292778, 0.04295642301440239, -0.03197735920548439, 0.03641442954540253, 0.005168464966118336, -0.00010972691961796954, -0.0829579159617424, -0.014677388593554497, -0.08750011026859283, -0.04695136100053787, -0.07696729153394699, -0.00718996487557888, 0.018294518813490868, -0.014321570284664631, -0.04416860267519951, -0.0890057235956192, -0.014466283842921257, 0.02831638976931572, -0.04845190420746803, 0.08228176832199097, 0.03420877829194069, 0.056510377675294876, 0.037403274327516556, 0.04364967346191406, 0.08903267979621887, -0.016827082261443138, -0.0682789757847786, 0.06286796927452087, -0.0958203598856926, 0.018489282578229904, 0.02886355295777321, 0.028006011620163918, 0.039986785501241684, -0.04771937429904938, -0.004648604430258274, 0.033939141780138016, -0.027820419520139694, -0.026187442243099213, -0.07972361892461777, 0.006323353853076696, 0.016448041424155235, -0.01961681991815567], - "VIRT_REG_GR64_NOSP_and_GR64_TC":[0.08079065382480621, -0.05147358775138855, -0.08338657021522522, 0.06757336109876633, -0.015237463638186455, 0.026806311681866646, 0.07564966380596161, -0.037159934639930725, -0.02222878858447075, -0.04553138092160225, -0.006632891017943621, 0.001604291144758463, 0.043711669743061066, 0.0710049569606781, -0.08854726701974869, -0.03142566233873367, -0.0865127220749855, 0.08521236479282379, 0.039203498512506485, 0.04737624153494835, 0.02893459051847458, 0.004120660945773125, 0.03552098199725151, -0.0010448878165334463, 0.04423774778842926, 0.03258584439754486, 0.03433830663561821, -0.019990455359220505, -0.03263172507286072, 0.09782663732767105, -0.00702365068718791, -0.06544602662324905, 0.013447105884552002, 0.04603038728237152, 0.029931804165244102, 0.0988783910870552, -0.062023941427469254, -0.0070026409812271595, 0.032557111233472824, -0.08212000876665115, 0.03199682757258415, 0.020828546956181526, 0.07071725279092789, -0.018812179565429688, -0.0184739138931036, -0.06008931249380112, 0.01504000648856163, -0.019235603511333466, 0.014653048478066921, -0.009083813987672329, 0.03171474114060402, 0.019499456509947777, 0.05263463407754898, 0.10554639250040054, -0.02759619802236557, -0.00156494346447289, -0.03898271545767784, 0.06027846410870552, -0.061001915484666824, 0.039365388453006744, -0.06546281278133392, 0.0006352368509396911, 0.0500405877828598, -0.03232716768980026, -0.010176514275372028, 0.002549059921875596, 0.0666508674621582, -0.037290267646312714, -0.028836704790592194, 0.06271649152040482, -0.016647985205054283, 0.013602355495095253, 0.020110899582505226, 0.011730309575796127, -0.10071564465761185, -0.06239647418260574, -0.09507977962493896, -0.09190725535154343, -0.08861985802650452, -0.0006123466300778091, 0.0951915979385376, -0.035364676266908646, -0.04007220268249512, 0.08415472507476807, 0.0006664254469797015, 0.05864431709051132, 0.01460045762360096, -0.09507087618112564, 0.024228032678365707, 0.04208158329129219, 0.006106846500188112, 0.09294755011796951, 0.06157369166612625, 0.0826527327299118, -0.058974966406822205, -0.09958664327859879, 0.06913749873638153, -0.08108915388584137, 0.07425157725811005, 0.04784728214144707], - "VIRT_REG_GR64_TC":[-0.0944172665476799, 0.040403831750154495, -0.017597073689103127, 0.04766053333878517, -0.03104357235133648, 0.025751160457730293, 0.036779265850782394, -0.0235747080296278, 0.032111138105392456, 0.009872193448245525, -0.01596468687057495, 0.05234881862998009, -0.047335200011730194, 0.005157034378498793, -0.02132921665906906, -0.0544377863407135, 0.057515472173690796, -0.006743279751390219, -0.01474941335618496, -0.0990658849477768, 0.022418741136789322, -0.007098495960235596, 0.046933863312006, 0.1002131924033165, 0.01583809033036232, 0.03995800018310547, -0.017743254080414772, -0.01684877835214138, 0.06543229520320892, 0.04597911611199379, 0.05365373566746712, -0.008774830959737301, -0.01341968309134245, -0.004754040390253067, 0.04739849269390106, 0.032378777861595154, -0.0020728895906358957, 0.03502136841416359, 0.05946416035294533, -0.06190952658653259, 0.01910495012998581, -0.023678753525018692, 0.012653682380914688, -0.06766874343156815, -0.0729866623878479, 0.0757005363702774, -0.027033904567360878, -0.06776778399944305, -0.010131776332855225, -0.06334701925516129, -0.04702980816364288, 0.06837917864322662, 0.002726735547184944, 0.04345812648534775, 0.04288078844547272, -0.06921732425689697, -0.07625382393598557, 0.037991974502801895, -0.04257906600832939, 0.06338586658239365, 0.05315309390425682, -0.02785014547407627, 0.04054750129580498, 0.06967299431562424, -0.07271680235862732, 0.0032969408202916384, -0.08254148811101913, 0.07269596308469772, -0.01827111467719078, 0.034775473177433014, 0.010106234811246395, 0.0389409065246582, 0.042805008590221405, -0.03822058066725731, 0.0668339803814888, -0.005216705612838268, -0.00022202919353730977, -0.0221820380538702, -0.027401722967624664, -0.045061662793159485, -0.05296671763062477, -0.0190189890563488, -0.002744461875408888, -0.04073096439242363, -0.06974441558122635, 0.05868958309292793, -0.06907399743795395, -0.026619713753461838, 0.015318086370825768, 0.035948701202869415, -0.08301021158695221, 0.03955607861280441, 0.028369972482323647, 0.0202812347561121, -0.12075140327215195, -0.039504438638687134, -0.03826067969202995, 0.01607581228017807, 0.02135113812983036, -0.08897850662469864], - "VIRT_REG_GR64_TC_with_sub_8bit":[0.00805664248764515, 0.06228634715080261, -0.005148644559085369, -0.025605352595448494, -0.04853198677301407, -0.018169978633522987, 0.008530518971383572, -0.1050964742898941, -0.08428415656089783, -0.014802628196775913, 0.05918573588132858, 0.07529161125421524, 0.09815273433923721, -0.014188972301781178, 0.06676790118217468, 0.09496084600687027, -0.03843621164560318, -0.00740150036290288, -0.11988909542560577, -0.01781499572098255, -0.03719411790370941, -0.07447166740894318, 0.005513608455657959, -0.014381160028278828, 0.036786310374736786, -0.04839075356721878, -0.009440913796424866, 0.03984222561120987, -0.08096668124198914, 0.026751000434160233, 0.06400448083877563, 0.07998895645141602, 2.295125523232855e-05, 0.0266779325902462, -0.0030931613873690367, 0.05236855521798134, -0.010479471646249294, -0.011119752191007137, -0.06124376133084297, -0.019449712708592415, 0.03448517248034477, -0.04095051810145378, 0.01377212442457676, 0.09643338620662689, 0.021325431764125824, 0.06029453128576279, 0.048866767436265945, -0.03436344116926193, -0.043422505259513855, 0.03822150453925133, 0.004718889016658068, -0.04090931639075279, -0.04219569265842438, 0.019032739102840424, 0.06111171841621399, 0.04305591061711311, -0.0379939004778862, -0.03224434703588486, -0.06517905741930008, 0.002272483194246888, 0.09273418039083481, -0.028145847842097282, 0.01824336126446724, 0.00936606340110302, -0.07281909137964249, -0.028650810942053795, -0.060721538960933685, -0.09477518498897552, -0.0014060320099815726, 0.06919887661933899, -0.03463669493794441, 0.0026504716370254755, -0.0653621107339859, -0.02800566703081131, -0.02503957599401474, -0.060285311192274094, 0.014794053509831429, -0.08424058556556702, 0.0482206828892231, -0.07467620074748993, -0.09909844398498535, -0.06888734549283981, -0.0014173799427226186, -0.09022543579339981, 0.06461413204669952, 0.024526789784431458, -0.07400602847337723, -0.008816084824502468, 0.025513656437397003, 0.047476526349782944, -0.05981749668717384, 0.08338218182325363, 0.02657591737806797, 0.03547860309481621, -0.043622229248285294, 0.10129662603139877, 0.08802521973848343, -0.09759330749511719, 0.025680232793092728, 0.05964493378996849], - "VIRT_REG_GR64_with_sub_16bit_in_GR16_NOREX":[-0.03117012232542038, -0.02872271090745926, -0.039712607860565186, 0.03738812729716301, 0.030099159106612206, 0.00013636364019475877, -0.019107641652226448, -0.04186702147126198, -0.053099144250154495, -0.020432034507393837, -0.0004185919533483684, 0.010934959165751934, 0.036054231226444244, 0.03788067027926445, 0.05227302014827728, -0.034505825489759445, -0.08298061788082123, 0.0399160161614418, 0.03668724000453949, 0.014606554992496967, -0.0071771652437746525, 0.059049926698207855, -0.06330917030572891, 0.007379058748483658, -0.0750177726149559, -0.0423760749399662, -0.019386067986488342, -0.018436923623085022, -0.015116279944777489, 0.023602722212672234, 0.0533282607793808, -0.026401247829198837, 0.023750485852360725, -0.027648568153381348, -0.016443056985735893, 0.04291580244898796, -0.04391908273100853, 0.05113501846790314, -0.03743087872862816, 0.056367188692092896, 0.048130668699741364, -0.0230261143296957, 0.03358393907546997, -0.030188169330358505, 0.08421863615512848, 0.0033821314573287964, 0.03151029348373413, -0.042818162590265274, 0.04007953777909279, -0.0050337472930550575, 0.03335743024945259, -0.026563530787825584, 0.016440672799944878, -0.04272226244211197, -0.07304228097200394, 0.024836458265781403, -0.016342775896191597, -0.055494848638772964, -0.05826134234666824, 0.027478834614157677, 0.025981346145272255, -0.04745938256382942, 0.013695796020328999, -0.027888784185051918, 0.03769542649388313, -0.024486247450113297, 0.04720773920416832, -0.012697651982307434, -0.03559652715921402, 0.012948199175298214, -0.025600459426641464, 0.014954420737922192, -0.06651762872934341, 0.04277091473340988, -0.08291683346033096, 0.016881149262189865, 0.04145864024758339, -0.04162050038576126, -0.03363965451717377, -0.05018439516425133, 0.06321889907121658, -0.00871780700981617, 0.06867428869009018, 0.057975344359874725, 0.009704249911010265, 0.049075234681367874, -0.06111253425478935, 0.027943406254053116, 0.03725599870085716, 0.032480716705322266, -0.01960119605064392, -0.0295172780752182, 0.014026675373315811, 0.056797921657562256, -0.031707022339105606, 0.0010152219329029322, -0.023705823346972466, -0.07695567607879639, 0.017504720017313957, -0.0020094760693609715], - "VIRT_REG_GR64_with_sub_8bit":[-0.011493992060422897, -0.027181852608919144, 0.022013556212186813, 0.05687474459409714, -0.03289574757218361, -0.04803529754281044, -0.04204253479838371, 0.044671084731817245, -0.0849028080701828, -0.09561576694250107, 0.03596775606274605, 0.027156801894307137, 0.05034027621150017, -0.006308000069111586, 0.012393618933856487, -0.048590339720249176, -0.049129705876111984, 0.059305012226104736, -0.10330235958099365, 0.00738809397444129, 0.03855152800679207, -0.03220852091908455, 0.05221837759017944, -0.01274650078266859, 0.024303985759615898, -0.05925533175468445, -0.015623844228684902, -0.025864524766802788, 0.009918035939335823, 0.004779431037604809, -0.02866589091718197, 0.006512579973787069, -0.037251196801662445, 0.005028596147894859, -0.011677909642457962, 0.051886074244976044, -0.03552602231502533, 0.011968757025897503, 0.00829426757991314, -0.06981230527162552, -0.029781555756926537, -0.012621275149285793, 0.08595969527959824, 0.08630531281232834, 0.10018875449895859, -0.054863955825567245, -0.044519901275634766, 0.0893385037779808, 0.04004377871751785, 0.003711731405928731, -0.021447300910949707, -0.08500636368989944, 0.0037281641270965338, 0.14561010897159576, 0.03993009030818939, 0.07621612399816513, 0.020513180643320084, 0.004926605150103569, -0.035578932613134384, 0.06101486086845398, -0.08422145247459412, -0.03511432558298111, 0.01537742093205452, -0.010146304965019226, -0.05133780837059021, -0.010472903028130531, -0.09726933389902115, -0.010570867918431759, -0.09348491579294205, 0.002129049738869071, -0.01265127956867218, 0.03504374623298645, -0.008679943159222603, -0.002507386729121208, -0.06586045026779175, -0.04775359109044075, -0.042809367179870605, 0.08359787613153458, -0.0230431966483593, -0.015440763905644417, 0.0195400882512331, -0.0186530202627182, -0.03176320344209671, -0.019522372633218765, -0.02984560839831829, 0.024256182834506035, -0.07656785100698471, 0.03944750130176544, 0.016559945419430733, 0.007124909665435553, 0.08061631768941879, 0.08561833202838898, -0.018525447696447372, -0.0019649232272058725, -0.018469924107193947, -0.012311050668358803, -0.08448101580142975, 0.060216110199689865, 0.06368701905012131, -0.07110093533992767], - "VIRT_REG_GR8":[0.02255251444876194, 0.012649326585233212, 0.05363747105002403, -0.006129346787929535, 0.027027001604437828, 0.03703385218977928, -0.045294541865587234, -0.02489621751010418, 0.026587747037410736, -0.06228360906243324, 0.01547946222126484, 0.03494448587298393, 0.08276952058076859, -0.03470698744058609, 0.0036826131399720907, 0.04216131567955017, -0.04518325626850128, 0.09584730118513107, -0.09126991778612137, -0.11293632537126541, 0.0141398124396801, -0.05086163431406021, 0.0421922467648983, -0.0001364851341350004, 0.05821910500526428, -0.04154132679104805, 0.036521218717098236, -0.016718950122594833, 0.0773339569568634, 0.05134757608175278, -0.03728386387228966, -0.014684299007058144, 0.016949277371168137, 0.025767508894205093, -0.01573120802640915, 0.0343811996281147, 0.008209497667849064, 0.0011038129450753331, -0.06688684970140457, -0.08167136460542679, 0.03875276446342468, 0.08301592618227005, 0.023012684658169746, 0.07135005295276642, 0.008461466059088707, 0.004998552612960339, 0.02622731775045395, -0.09479465335607529, 0.014987453818321228, -0.008574756793677807, -0.008050303906202316, -0.005560623947530985, 0.04616820812225342, 0.11537269502878189, 0.032199542969465256, 0.05507092550396919, -0.053164780139923096, 0.012255114503204823, -0.01981479674577713, 0.06012535095214844, 0.043957680463790894, 0.02384384348988533, 0.04837791621685028, 0.04945961385965347, -0.1063770279288292, -0.07354240119457245, -0.08922741562128067, -0.026019031181931496, -0.08768662065267563, 0.09241457283496857, 0.03253300115466118, -0.018267929553985596, -0.04406850412487984, -0.05577726289629936, -0.05304105579853058, 0.016035545617341995, 0.05610279366374016, 0.06247573718428612, -0.019430609419941902, -0.017088554799556732, -0.022114543244242668, 0.07442588359117508, -0.017668865621089935, -0.02403153106570244, 0.006919574458152056, 0.05879344418644905, -0.0885634645819664, -0.016336753964424133, -0.024662213400006294, 0.029266972094774246, -0.04889025166630745, 0.042460259050130844, -0.013102580793201923, 0.023992935195565224, 0.024768078699707985, 0.047551900148391724, -0.02243787795305252, 0.05929713696241379, 0.03110451251268387, -0.00550821190699935], - "VIRT_REG_RFP80":[-0.04414765536785126, 0.05147779360413551, -0.035608600825071335, -0.03939598798751831, 0.0430026613175869, -0.03331028297543526, 0.015591064468026161, 0.01892651617527008, -0.011428372003138065, -0.06980786472558975, 0.06445881724357605, 0.1036338210105896, 0.01164929661899805, -0.07599718868732452, 0.022036561742424965, 0.10396245121955872, -0.041171155869960785, -0.07264886051416397, 0.00032837275648489594, 0.02848120965063572, -0.031889040023088455, 0.023848745971918106, -0.02298046089708805, -0.05559201166033745, 0.026687605306506157, 0.0565699003636837, -0.0134252505376935, 0.05494402348995209, -0.0584089457988739, 0.05422470346093178, -0.024360226467251778, 0.03570455685257912, 0.013681530021131039, -0.006910417694598436, 0.011886067688465118, 0.07619262486696243, 0.08147607743740082, 0.05824091285467148, 0.001224246108904481, -0.030463339760899544, -0.023527851328253746, 0.03078501485288143, -0.02225799672305584, -0.058049511164426804, 0.015403151512145996, 0.07900431007146835, 0.025944147258996964, 0.021455328911542892, 0.023985104635357857, -0.0327906534075737, 0.04195002466440201, -0.10313323140144348, -0.023333510383963585, -0.010316243395209312, -0.02042137086391449, 0.07474000751972198, 0.02313513681292534, -0.0030733307357877493, 0.06138097122311592, 0.005197131074965, -0.03222955763339996, 0.005364845506846905, -0.05313501134514809, 0.0013082564109936357, 0.025044983252882957, 0.0349799208343029, 0.09704083949327469, -0.017403649166226387, -0.03375721350312233, 0.05970870703458786, -0.021679691970348358, -0.04719642922282219, 0.024217652156949043, -0.06130526587367058, 0.004813425708562136, 0.07473690062761307, -0.039600174874067307, -0.009295261465013027, 0.05440402403473854, 0.04785943776369095, -0.04006686061620712, -0.020133933052420616, 0.00989031046628952, -0.054447200149297714, 0.06291327625513077, -0.01196430902928114, 0.0841275230050087, -0.05557875707745552, -0.0813804343342781, -0.0746457576751709, -0.024255990982055664, -0.048101916909217834, -0.014132879674434662, -0.013147399760782719, -0.009715595282614231, 0.08717820793390274, -0.04318689927458763, -0.0311901792883873, -0.017253845930099487, 0.005144816357642412], - "VIRT_REG_VR128":[0.08292517066001892, 0.053138989955186844, 0.0019234063802286983, -0.030035940930247307, 0.0821828693151474, -0.0540342852473259, 0.06449387222528458, -0.03985493257641792, 0.026820721104741096, 0.0352952741086483, -0.1056072935461998, 0.054804764688014984, 0.01685425080358982, 0.05867069214582443, 0.11665259301662445, -0.07655566930770874, 0.021201618015766144, 0.00927705504000187, -0.04723019897937775, 0.016582123935222626, -0.01160470675677061, -0.013075411319732666, 0.01054342370480299, -0.05403316020965576, 0.033609066158533096, -0.07971179485321045, 0.1005927175283432, -0.020655132830142975, -0.0036442605778574944, 0.018269486725330353, 0.036334097385406494, -0.06517180055379868, -0.028530113399028778, -0.03768114373087883, 0.10582506656646729, 0.011199450120329857, -0.06707775592803955, 0.02332702837884426, -0.014528930187225342, -0.09369251132011414, 0.069722481071949, 0.031001657247543335, 0.08032777905464172, -0.060744334012269974, 0.015131807886064053, 0.01935953088104725, -0.087028868496418, 0.041773099452257156, 0.0381581112742424, -0.07518653571605682, 0.021307995542883873, -0.07350508868694305, -0.04699733853340149, -0.007377162110060453, 0.07836157828569412, 0.016066696494817734, -0.02160775288939476, -0.030519334599375725, -0.09255059063434601, 0.03597188740968704, -0.11260625720024109, -0.08602424710988998, 0.058293748646974564, -0.034749604761600494, 0.005541469436138868, -0.07924741506576538, -0.024103455245494843, 0.06047135218977928, 0.026729481294751167, 0.03493977710604668, -0.07453227788209915, -0.01716521382331848, 0.008985077030956745, -0.08075122535228729, 0.03353623300790787, -0.08125714957714081, 0.04245763644576073, 0.06520543247461319, 0.020550349727272987, -0.003161275526508689, -0.03491697832942009, -0.005496494937688112, 0.09021904319524765, -0.057418785989284515, 0.03494826331734657, -0.052578359842300415, -0.044952504336833954, 0.11770184338092804, -0.048565153032541275, -0.03815764561295509, 0.06020108237862587, -0.09397949278354645, 0.03820547088980675, 0.08039405196905136, 0.014751153998076916, 0.006572262849658728, 0.05658692866563797, 0.05043925344944, -0.0060436660423874855, -0.12018798291683197], - "VIRT_REG_VR256":[0.032775089144706726, 0.029240285977721214, 0.01821955479681492, 0.023595772683620453, -0.02587016113102436, -0.12190376222133636, 0.09720813482999802, 0.005780891049653292, -0.0581410676240921, 0.04817686229944229, -0.04627984017133713, 0.03618951886892319, -0.10393846780061722, 0.04380590096116066, 0.030101926997303963, -0.021811308339238167, 0.0012455569813027978, 0.06209835410118103, -0.08859474956989288, 0.0671553835272789, -0.006448917090892792, 0.0169842429459095, 0.031113164499402046, -0.07417412847280502, 0.05549546331167221, -0.013042094185948372, 0.0948401540517807, -0.07335975021123886, -0.03987044095993042, -0.005343804135918617, -0.08741248399019241, -0.08009110391139984, 0.005667346995323896, 0.03745159134268761, 0.019986214116215706, -0.03723142296075821, -0.0037649653386324644, 0.005682446528226137, 0.0659727230668068, -0.002658356446772814, 0.07049102336168289, -0.01944110542535782, -0.014278342947363853, 0.04189611226320267, 0.0312303826212883, -0.046760618686676025, 0.040438465774059296, 0.054074693471193314, 0.07479880005121231, -0.016405146569013596, 0.027125591412186623, -0.04216836765408516, 0.0011189498472958803, -0.01471384521573782, -0.010250975377857685, -0.006412460468709469, -0.12170380353927612, 0.015495882369577885, -0.054699406027793884, 0.05955614894628525, 0.06753991544246674, -0.03688138723373413, 0.049010518938302994, -0.07614680379629135, 0.06504888087511063, -0.014145595952868462, 0.02210555598139763, 0.023598313331604004, 0.00511248828843236, 0.013318972662091255, -0.11605404317378998, -0.032067783176898956, -0.05010659247636795, -0.023693162947893143, 0.06650379300117493, -0.026386691257357597, 0.06052805855870247, 0.0515507273375988, 0.033960308879613876, -0.06421340256929398, -0.09355985373258591, -0.0658700093626976, 0.10278744995594025, -0.10271084308624268, -0.012089421041309834, -0.04169749841094017, -0.07112454622983932, -0.032573599368333817, -0.0003141233173664659, 0.017007946968078613, 0.03622191399335861, 0.05829676240682602, 0.06261610984802246, 0.005667738616466522, 0.009631159715354443, 0.022852277383208275, 0.057013869285583496, -0.05015721917152405, 0.027599012479186058, -0.08637165278196335], "VMASKMOVPDYmr":[-0.04878474771976471, 0.009688055142760277, 0.05428608879446983, -0.030850162729620934, 0.03008297272026539, 0.03831377625465393, -0.023454757407307625, 0.061062078922986984, -0.07177434861660004, 0.003681673901155591, 0.040161218494176865, -0.009652352891862392, 0.07261710613965988, -0.010966332629323006, -0.013221205212175846, -0.03301544487476349, 0.04829031974077225, -0.08083753287792206, 0.030231673270463943, -0.02659734897315502, -0.036777157336473465, 0.06681652367115021, 0.01175805926322937, 0.06305940449237823, -0.019296150654554367, 0.02796877548098564, -0.029999401420354843, -0.0198240764439106, -0.04471949115395546, -0.06781838089227676, 0.024380704388022423, 0.03754236921668053, 0.06767786294221878, 0.04803696274757385, 0.046649131923913956, 0.04538867995142937, -0.028629129752516747, 0.0127564687281847, 0.004995361436158419, -0.08728974312543869, 0.029057662934064865, 0.07067801058292389, 0.0007887053652666509, 0.019237162545323372, -0.04447153955698013, -0.10583364218473434, 0.08983936905860901, 0.015038984827697277, -0.034384895116090775, -0.055098336189985275, -0.07670909911394119, 0.002524072304368019, 0.10086455941200256, 0.022610867395997047, 0.05591642111539841, -0.07907918840646744, -0.04253252223134041, 0.05387851223349571, -0.034182146191596985, -0.08478306978940964, -0.039358172565698624, 0.05872701108455658, 0.0004980096709914505, -0.054017916321754456, -0.05543661117553711, -0.05234605073928833, -0.01648441143333912, -0.039598412811756134, 0.014009279198944569, 0.07753992825746536, -0.024791967123746872, 0.0015941763995215297, -0.08564147353172302, 0.015439499169588089, 0.04659571126103401, 0.042471837252378464, 0.005456998012959957, 0.015990061685442924, -0.02272135764360428, -0.03891618177294731, -0.0077924951910972595, -0.05113787576556206, 0.040118955075740814, -0.043831776827573776, 0.05283576622605324, 0.09104584157466888, 0.015506122261285782, -0.028880758211016655, -0.0025508899707347155, 0.08238258212804794, -0.011219828389585018, 0.0496247261762619, -0.044287387281656265, 0.050674524158239365, 0.02936738170683384, -0.017218898981809616, 0.07722929865121841, 0.04578819498419762, -0.031120644882321358, -0.022032534703612328], "VMASKMOVPSYmr":[0.020578626543283463, -0.004085692577064037, 0.07696651667356491, 0.028803450986742973, -0.006955036427825689, -0.018540993332862854, 0.0719260424375534, 0.09322775900363922, 0.05095001682639122, -0.01811334490776062, 0.01627892442047596, 0.050088733434677124, -0.06736274808645248, 0.025077303871512413, 0.06022811681032181, -0.09305489808320999, -0.09338469058275223, -0.0525103323161602, -0.06159364432096481, 0.030921749770641327, 0.06632588058710098, 0.031169326975941658, 0.016549210995435715, -0.06410345435142517, 0.034944821149110794, 0.01632581278681755, 0.06805131584405899, -0.004622941836714745, -0.02994105964899063, 0.025459013879299164, 0.020487098023295403, 0.06677251309156418, -0.046148937195539474, -0.05847230181097984, -0.0662517175078392, -0.006552667822688818, 0.05338975414633751, -0.07456435263156891, -0.05682503432035446, -0.0720917284488678, -0.08354304730892181, 0.057539310306310654, -0.0984572172164917, -0.015717046335339546, -0.04905203357338905, -0.016580646857619286, 0.030063051730394363, -0.04245767742395401, -0.019089849665760994, 0.037014883011579514, -0.03125334531068802, -0.02194075658917427, 0.057924628257751465, 0.053156934678554535, 0.03154401481151581, -0.03698640316724777, -0.047283731400966644, -0.07787752151489258, -0.09294760227203369, 0.008879968896508217, -0.039479922503232956, 0.06407082825899124, 0.021868228912353516, 0.02621234394609928, -0.05872492864727974, -0.07943505048751831, 0.024682780727744102, 0.014713538810610771, 0.02206231839954853, -0.0664556622505188, -0.08985312283039093, -0.028045928105711937, 0.022865260019898415, -0.03564520925283432, 0.06292934715747833, 0.009946631267666817, 0.031550049781799316, -0.08577742427587509, 0.047102898359298706, -0.07018786668777466, -0.10670997202396393, 0.0016501775244250894, -0.08505392074584961, 0.00861909706145525, -0.06370823830366135, 0.03423767164349556, 0.03173772618174553, 0.019602738320827484, 0.021573755890130997, 0.02385428547859192, -0.01468846295028925, 0.023825718089938164, -0.05538937821984291, 0.05968264490365982, 0.08997872471809387, -0.006320557557046413, 0.012793052941560745, -0.10326020419597626, -0.015349009074270725, 0.006139614153653383], "VMASKMOVPSYrm":[-0.09999474138021469, -0.05461611971259117, 0.06111544370651245, 0.009340068325400352, 0.05158305540680885, 0.018409717828035355, 0.03258055821061134, -0.0017857305938377976, 0.041260261088609695, -0.04183795303106308, -0.04711655154824257, 0.007005605846643448, 0.017177876085042953, 0.011972760781645775, -0.058734532445669174, 0.022736912593245506, -0.10794606059789658, 0.029367392882704735, -0.012645614333450794, -0.09590506553649902, -0.07090207934379578, -0.05850019305944443, -0.018024247139692307, -0.0036007456947118044, -0.06459654122591019, 0.009839186444878578, 0.04846305027604103, -0.11106285452842712, 0.029033005237579346, 0.10009876638650894, 0.012796668335795403, -0.0073439814150333405, -0.08754748106002808, -0.037603843957185745, -0.015900349244475365, -0.007158457301557064, 0.03420218825340271, 0.027995899319648743, -0.07699259370565414, 0.042778756469488144, 0.04648644104599953, -0.04391217231750488, -0.018405593931674957, -0.01280362717807293, 0.08530068397521973, -0.03674551844596863, 0.06248623505234718, 0.0038591010961681604, -0.07031620293855667, -0.01702764257788658, 0.005379523150622845, -0.029414091259241104, 0.00011999297566944733, 0.058016858994960785, 0.10091454535722733, 0.07112561911344528, -0.07445680350065231, -0.08252609521150589, -0.05458306148648262, 0.0828995481133461, 0.030287114903330803, 0.08512170612812042, -0.0745752677321434, 0.011145705357193947, 0.07730960845947266, 0.06756677478551865, 0.10192125290632248, -0.015338120050728321, 0.025173967704176903, -0.017697714269161224, 0.00455897580832243, -0.01002852339297533, -0.09001599997282028, 0.06024448946118355, 0.01357717253267765, -0.04349803552031517, 0.026919689029455185, 0.07871785014867783, 0.06163106486201286, -0.02904645912349224, 0.05042176693677902, 0.019180594012141228, -0.029065869748592377, -0.02645217627286911, -0.04180121049284935, -0.01644887775182724, 0.005278781522065401, 0.021325504407286644, 0.0710480809211731, -0.02405066229403019, 0.06883849203586578, -0.08493685722351074, -0.0180019810795784, 0.10276532918214798, -0.04697193205356598, -0.0004998040967620909, 0.014400942251086235, 0.07172509282827377, 0.027445673942565918, 0.04722077399492264], @@ -673,5 +642,42 @@ "V_SETALLONES":[0.011805560439825058, 0.005605545360594988, 0.019577916711568832, -0.007038246374577284, -0.013101942837238312, -0.060087915509939194, 0.06600171327590942, 0.1127510741353035, 0.03251935541629791, -0.08513955771923065, -0.1272188425064087, -0.05743984133005142, 0.03415455296635628, -0.01813715696334839, 0.08123213797807693, -0.02604430541396141, 0.004977638833224773, -0.05056260898709297, 0.0759609192609787, -0.03905864432454109, -0.029284782707691193, -0.0773778036236763, -0.06391929090023041, 0.03013690747320652, 0.025567403063178062, -0.04096659645438194, -0.013911372050642967, 0.03076753579080105, 0.09287972748279572, 0.06516721844673157, 0.013303481042385101, -0.05148301273584366, 0.013247961178421974, -0.02087739109992981, -0.06532798707485199, -0.07080436497926712, 0.03797996789216995, -0.05954182893037796, -0.006158157251775265, -0.039611611515283585, 0.016250262036919594, -0.009441757574677467, -0.009183786809444427, 0.16159473359584808, 0.08712765574455261, -0.022884182631969452, -0.03575573116540909, -0.03199240192770958, -0.03306444734334946, -0.003918874077498913, 0.062194518744945526, 0.015179269947111607, -0.027334710583090782, -0.058873455971479416, 0.128275528550148, -0.0292880367487669, -0.07747887820005417, 0.1131230816245079, 0.02434738725423813, -0.025987306609749794, 0.006977062206715345, 0.005061171483248472, 0.010551988147199154, -0.011694980785250664, -0.04222672060132027, 0.0018857514951378107, -0.09771532565355301, 0.005980918649584055, -0.021874738857150078, -0.03269551321864128, -0.0660959854722023, -0.03511122986674309, -0.012204808183014393, -0.010394910350441933, 0.05620425567030907, -0.07928325980901718, 0.0231300238519907, -0.018796175718307495, -0.059483520686626434, -0.06498315185308456, -0.002720780670642853, 0.017449399456381798, -0.07902888208627701, -0.09885134547948837, 0.013462111353874207, 0.0991656631231308, 0.03312922269105911, -0.006249894388020039, 0.005173753947019577, -0.06332565099000931, -0.06398826092481613, -0.03855561092495918, 0.049685269594192505, 0.016197331249713898, -0.006844596937298775, -0.05894636735320091, 0.026065604761242867, -0.023921040818095207, 0.0833858922123909, 0.04180749133229256], "XCHG":[0.03013892099261284, -0.005918541457504034, -0.003877029987052083, -0.01153622567653656, 0.07044235616922379, 0.0020885420963168144, -0.04268760234117508, 0.07963797450065613, 0.0896378755569458, -0.03346250206232071, -0.026062551885843277, 0.07721738517284393, 0.08893758058547974, 0.0798523873090744, -0.025333784520626068, -0.01930663175880909, -0.012997916899621487, -0.051225848495960236, -0.0299966000020504, -0.032841041684150696, -0.06343690305948257, -0.016547048464417458, 0.034530773758888245, 0.057199425995349884, 0.0693645030260086, 0.04208416864275932, -0.028830133378505707, 0.08431533724069595, -0.06464798003435135, 0.0009512414690107107, 0.042868468910455704, -0.031348757445812225, -0.01816270686686039, 0.05597987025976181, -0.017707090824842453, -0.03889893740415573, -0.052769940346479416, 0.012921033427119255, -0.029488561674952507, -0.012502696365118027, 0.05398940294981003, -0.032147347927093506, -0.005250571761280298, -0.014250441454350948, 0.08205590397119522, 0.049281857907772064, -0.07257362455129623, -0.0003973407146986574, -0.00821124017238617, 0.10007432103157043, 0.054469816386699677, -0.05644146353006363, 0.013105852529406548, -0.08262810856103897, -0.02594495750963688, 0.007682343479245901, -0.011262120679020882, -0.007376475725322962, -0.05011703073978424, -0.06952987611293793, -0.033738043159246445, 0.01750120520591736, -0.026767224073410034, -0.04718783125281334, 0.002559647196903825, 0.01700885407626629, -0.07193762063980103, 0.07015261799097061, 0.0034866048954427242, -0.08257746696472168, -0.07703307271003723, 0.006709580775350332, 0.06423933804035187, 0.024792056530714035, -0.008637255057692528, 0.0364011712372303, 0.035330090671777725, -0.060980167239904404, 0.026977067813277245, -0.02813805267214775, -0.02690977416932583, 0.05637027323246002, 0.008040377870202065, -0.03371180593967438, -0.06654872000217438, -0.030922764912247658, -0.07050447911024094, 0.047597192227840424, 0.047301240265369415, 0.04565070942044258, -0.0005885852151550353, -0.01970672234892845, -0.013277091085910797, 0.03462797775864601, -0.050644565373659134, -6.830461643403396e-05, -0.0032834408339112997, -0.09096988290548325, -0.0431605726480484, 0.004180085379630327], "XOR":[0.05397406592965126, 0.030059566721320152, -0.008174624294042587, -0.015902524814009666, -0.05867229402065277, 0.10023067146539688, 0.039013586938381195, -0.0062194764614105225, 0.0027951474767178297, -0.12871405482292175, 0.006182669661939144, -0.03362947702407837, 0.03972288593649864, -0.0761077031493187, 0.07198456674814224, 0.06330277770757675, -0.020690103992819786, 0.04084693267941475, -0.029953323304653168, -0.1037738174200058, 0.058683767914772034, -0.09326515346765518, -0.030509043484926224, 0.08620086312294006, -0.028335779905319214, 0.0025649559684097767, 0.02293877862393856, 0.06309233605861664, 0.05537085980176926, 0.008650199510157108, 0.08450134843587875, 0.006163342390209436, 0.08676894754171371, 0.00373055599629879, -0.0536164715886116, 0.017478466033935547, -0.02005663886666298, -0.009954672306776047, 0.0935724526643753, -0.013202485628426075, 0.019175032153725624, 0.047811202704906464, -0.010279017500579357, 0.08613553643226624, 0.030951783061027527, -0.007498149760067463, 0.02222890406847, 0.022576699033379555, -0.037464242428541183, -0.05039561539888382, -0.05145428702235222, 0.05291113257408142, -0.04549814388155937, 0.07552238553762436, 0.04320567473769188, 0.08343681693077087, -0.03850278630852699, -0.01834949105978012, 0.047886237502098083, 0.00965320598334074, 0.014898041263222694, -0.06947735697031021, -0.002480468712747097, 0.033667247742414474, -0.057668499648571014, 0.038462892174720764, -0.04644528403878212, -0.06664751470088959, -0.048734813928604126, 0.04303475841879845, 0.027636554092168808, 0.024116700515151024, -0.003788548056036234, -0.0088395019993186, -0.04236738011240959, -0.02894027903676033, -0.135579451918602, -0.032144784927368164, -0.11316774785518646, -0.0039872839115560055, 0.07162772864103317, 0.03945969045162201, 0.007661669049412012, 0.04564569517970085, 0.023007070645689964, 0.0002026051515713334, -0.030437719076871872, -0.01982058770954609, -0.017619898542761803, -0.04013601690530777, 0.03464880958199501, -0.04437020793557167, 0.010373799130320549, -0.057255037128925323, -0.006371108815073967, -0.02713695913553238, -0.06605585664510727, 0.01780680939555168, -0.00013575045159086585, 0.07283638417720795] + }, + "CommonOperands" : { + "Immediate":[-0.039664868265390396, 0.028720445930957794, -0.057207897305488586, 0.04179477319121361, 0.04477043077349663, 0.020050648599863052, -0.056656818836927414, -0.025030966848134995, -0.04394019395112991, 0.04849115386605263, 0.012325904332101345, 0.06731707602739334, 0.04568001255393028, -0.04773757979273796, -0.012142524123191833, -0.03986259177327156, -0.027249159291386604, -0.04930245876312256, -0.10542229562997818, -0.05678592994809151, -0.038303568959236145, -0.07283245027065277, 0.0217409897595644, -0.01139344647526741, 0.006936497986316681, -0.04702157527208328, 0.09977010637521744, -0.035237088799476624, 0.028822069987654686, -0.0691431537270546, -0.0829710066318512, -0.1289154589176178, -0.08470306545495987, -0.06731563061475754, 0.06642980873584747, 0.026025734841823578, -0.04049745202064514, 0.030080674216151237, 0.04203929752111435, 0.06834205985069275, 0.04315062239766121, 0.00788890291005373, 0.03426999971270561, 0.08819636702537537, 0.004112098831683397, 0.03392210975289345, 0.010541473515331745, 0.08045777678489685, -0.02914009988307953, 0.0624285452067852, 0.03299122676253319, -0.05355033650994301, -0.07568570226430893, 0.08106201142072678, 0.0376802459359169, -0.04886564612388611, -0.10992937535047531, -0.00761816743761301, -0.014918084256350994, 0.03816765174269676, -0.04981819912791252, 0.00031993765151128173, 0.011382698081433773, -0.029902901500463486, -0.0117422454059124, -0.057965945452451706, -0.09519924223423004, 0.020727403461933136, -0.04526710882782936, 0.09883677959442139, 0.018033087253570557, -0.003035350237041712, -0.06968960911035538, -0.09893210977315903, -0.01264366414397955, 0.017397744581103325, -0.08519260585308075, 0.09382850676774979, -0.055508699268102646, -0.026548130437731743, -0.013868317008018494, -0.03162496164441109, 0.06089535728096962, -0.01583624631166458, -0.060260944068431854, 0.06709896773099899, -0.09333796799182892, -0.02887417934834957, -0.03424007445573807, -0.01687423326075077, 0.11968979239463806, -0.08361987769603729, 0.09037765115499496, -0.04322688281536102, -0.040831610560417175, -0.061376459896564484, -0.03485504537820816, 0.016033072024583817, 0.004106835462152958, -0.03354674205183983], + "MBB":[0.0285621527582407, 0.017540860921144485, -0.08473232388496399, -0.004012782592326403, 0.01284435298293829, -0.05268647149205208, 0.05576688051223755, 0.0021535248961299658, -0.03945871442556381, -0.006189210340380669, -0.015129411593079567, -0.08998296409845352, -0.023543253540992737, -0.03973307088017464, 0.03474939242005348, -0.01602775789797306, -0.07461361587047577, -0.016514597460627556, -0.016366377472877502, 0.004728052299469709, -0.023341577500104904, -0.0914730429649353, 0.030636735260486603, -0.03425632417201996, 0.03614623472094536, -0.007019295822829008, -0.0218521635979414, -0.015808485448360443, -0.05414801836013794, 0.029721688479185104, 0.09407073259353638, 0.029655681923031807, -0.005722714588046074, 0.08653672784566879, 0.01633341796696186, -0.07890991121530533, -0.07574641704559326, 0.013483843766152859, -0.0011275253491476178, -0.05623066797852516, -0.03096684440970421, -0.0019136210903525352, 0.005127475131303072, 0.005057196598500013, -0.008401975966989994, -0.0391613207757473, -0.0026145142037421465, 0.05342942103743553, 0.034099776297807693, 0.028928104788064957, -0.006105952430516481, -0.039190810173749924, 0.026784662157297134, -0.07679374516010284, -0.007475676946341991, -0.036650288850069046, 0.00774755235761404, 0.008984091691672802, -0.059830714017152786, 0.042310964316129684, 0.0681624785065651, -0.018189340829849243, -0.014816401526331902, -0.05541539564728737, -0.09348370134830475, 0.003691869555041194, -0.0010735570685938, -0.010131723247468472, -0.041050590574741364, -0.013792471028864384, -0.024337435141205788, 0.07526508718729019, 0.08163300901651382, -0.03508464992046356, -0.01681988686323166, -0.06734774261713028, -0.07656992971897125, -0.03866373747587204, 0.004544078838080168, 0.0585801787674427, -0.021823249757289886, -0.0610244981944561, -0.04469957575201988, -0.011089849285781384, -0.05069964751601219, -0.025694409385323524, -0.0670132040977478, 0.09616350382566452, 0.06308142840862274, -0.10543308407068253, 0.0023751568514853716, -0.06237253174185753, 0.05771911144256592, -0.06010056659579277, -0.016188565641641617, 0.009142348542809486, -0.014255198650062084, -0.02999819628894329, 0.00473234336823225, 0.03976761922240257], + "FrameIndex":[0.05219179764389992, -0.01926516741514206, -0.021848104894161224, -0.008528115227818489, 0.02989117242395878, -0.012461756356060505, -0.050973404198884964, 0.026713935658335686, 0.01968700997531414, -0.001058116089552641, 0.009182002395391464, 0.03877940773963928, 0.070717453956604, -0.0028735792730003595, 0.0528000183403492, -0.015265910886228085, 0.007753959856927395, 0.01596899703145027, -0.07933179289102554, -0.02578687109053135, 0.02417992427945137, -0.03462255373597145, 0.04385964199900627, 0.004388607107102871, 0.03716951236128807, 0.04064105078577995, 0.07711678743362427, 0.0068300217390060425, -0.05443308874964714, -0.010809220373630524, -0.03124961629509926, 0.004911563824862242, -0.09201066941022873, 0.051436200737953186, 0.015400445088744164, 0.07804328948259354, -0.02971532940864563, -0.0003241244703531265, -0.02131350338459015, -0.09173687547445297, -0.01707594096660614, 0.0025449323002249002, 0.08701702952384949, 0.10675988346338272, -0.05082142353057861, 0.021581847220659256, -0.04104776680469513, 0.08402986079454422, -0.06109907105565071, 0.015201682224869728, 0.04374992102384567, -0.028573378920555115, -0.07767742872238159, 0.07216905802488327, 0.020538095384836197, -0.01229778677225113, 0.003033912740647793, -0.0007747758063487709, -0.09185474365949631, -0.02851664461195469, -0.009441743604838848, 0.05500328913331032, -0.002983751241117716, -0.09198789298534393, -0.051319632679224014, -0.054626885801553726, -0.020108554512262344, 0.0010591084137558937, -0.009138713590800762, 0.07223176956176758, -0.022099260240793228, 0.016025206074118614, -0.05320229008793831, 0.025131219998002052, 0.06626036763191223, 0.07639450579881668, -0.027084894478321075, 0.06581225991249084, -0.017618829384446144, -0.03859466314315796, -0.03385398909449577, 0.018783841282129288, -0.0730312392115593, 0.06957981735467911, -0.03065340407192707, 0.020685074850916862, -0.05311165004968643, 0.09466810524463654, 0.00955914705991745, -0.013919183053076267, -0.05540250986814499, -0.03087283857166767, -0.009688221849501133, 0.016239993274211884, -0.012926830910146236, -0.027712060138583183, -0.06342892348766327, -0.011996395885944366, 0.05536693334579468, -0.04359230771660805], + "ConstantPoolIndex":[0.041396364569664, -0.032536957412958145, -0.01450332161039114, -0.006678386591374874, 0.058945223689079285, 0.02544882893562317, -0.03047209233045578, -0.07739393413066864, -0.09328317642211914, -0.01668739691376686, -0.024649402126669884, -0.0379607230424881, -0.11910244077444077, -0.020992999896407127, -0.007654233835637569, -0.005232746247202158, -0.05641235038638115, -0.030478237196803093, -0.11095637828111649, -0.029757868498563766, 0.007831704802811146, -0.06478779017925262, -0.029330771416425705, -0.016729608178138733, 0.016851121559739113, -0.08636923134326935, 0.09819734841585159, -0.06862954050302505, -0.054081980139017105, -0.11573795974254608, 0.025045182555913925, -0.045820001512765884, -0.03937136381864548, -0.0006095073185861111, 0.010480350814759731, 0.04263518005609512, -0.07309181243181229, 0.030367357656359673, 0.05174611508846283, -0.07616177201271057, 0.08458246290683746, -0.05704038590192795, -0.08539492636919022, -0.027642514556646347, -0.01617196388542652, 0.025178344920277596, 0.009598441421985626, -0.02391812391579151, -0.007018273696303368, 0.08220435678958893, 0.019317878410220146, -0.07800780981779099, 0.008812256157398224, -0.08796992152929306, -0.018406951799988747, 0.06285018473863602, 0.0247958917170763, -0.010797450318932533, 0.042904313653707504, 0.04307369515299797, 0.03591239079833031, 0.0318138487637043, -0.052741825580596924, -0.05960077419877052, 0.05289359390735626, -0.07335714250802994, -0.07966916263103485, 0.06509458273649216, -0.014078558422625065, 0.05966315418481827, -0.10191051661968231, 0.038503143936395645, 0.08414285629987717, -0.09167703986167908, -0.03125883638858795, 0.00029595239902846515, -0.05052953213453293, 0.06109768897294998, 0.027757229283452034, 0.07064288854598999, 0.025423981249332428, 0.04430470988154411, 0.006646708585321903, 0.011614424176514149, -0.058028463274240494, -0.026873555034399033, -0.045714568346738815, -0.009242760017514229, -0.08255617320537567, 0.03060135245323181, -0.019932182505726814, -0.07189206779003143, 0.01935136877000332, 0.05297813192009926, 0.004497232846915722, -0.08383949100971222, -0.0008196682319976389, 0.03524069860577583, 0.023135961964726448, 0.00863903108984232], + "JumpTableIndex":[-0.007416237145662308, 0.0038157713133841753, 0.05180662125349045, 0.03776901960372925, -0.011749244295060635, -0.02952706068754196, -0.06646136939525604, 0.02088487148284912, -0.001927916775457561, 0.018895410001277924, 0.0509350448846817, 0.057210080325603485, -0.0476078987121582, -0.00016809302906040102, -0.02341553010046482, -0.06734820455312729, 0.02047930844128132, 0.009282611310482025, 0.0038133300840854645, 0.0020261742174625397, -0.09253961592912674, 0.0766557827591896, -0.049570225179195404, -0.11510220915079117, -0.009570423513650894, -0.007274465169757605, 0.07750000059604645, 0.02489926479756832, -0.08297400176525116, 0.048176445066928864, 0.03797437995672226, 0.060842450708150864, 0.020265065133571625, -0.03559373319149017, 0.03493893891572952, -0.0036544676404446363, 0.010211148299276829, -0.06471849977970123, -0.034595828503370285, -0.05245388671755791, -0.0014119939878582954, 0.008752748370170593, -0.020637203007936478, 0.053244929760694504, 0.052053239196538925, 0.014706660993397236, 0.02803724631667137, -0.07983336597681046, 0.03106858767569065, 0.001688914722763002, -0.07647732645273209, -0.028148295357823372, -0.0528123639523983, 0.08006428182125092, -0.06398879736661911, -0.033476538956165314, 0.05217607319355011, -0.03093232959508896, 0.044230975210666656, 0.05123162269592285, -0.05225585401058197, 0.06976816058158875, -0.0014492797199636698, 0.03833283483982086, 0.08385992050170898, -0.04722217097878456, -0.00226160092279315, -0.027254855260252953, -0.09566919505596161, 0.02109321765601635, -0.032354824244976044, 0.08032239973545074, -0.046937450766563416, -0.004326784983277321, -0.026024870574474335, 0.12039119750261307, 0.1016048863530159, 0.06808122247457504, -0.012297546491026878, -0.06450799852609634, 0.015778351575136185, 0.012280710972845554, 0.04002666845917702, 0.04792468994855881, -0.06248988211154938, -0.054222140461206436, 0.018379682675004005, -0.0029111658222973347, 0.016062958166003227, 0.09880068898200989, 0.03846307471394539, 0.04975416138768196, 0.07305088639259338, -0.020941948518157005, -0.020897891372442245, 0.03872328996658325, -0.05682756006717682, 0.09583723545074463, 0.0028475294820964336, -0.05127262324094772], + "ExternalSymbol":[0.014755810610949993, -0.049842361360788345, -0.06733497977256775, 0.05401315540075302, 0.061938412487506866, 0.02437831088900566, -0.06823863834142685, 0.03685877099633217, 0.02961423434317112, -0.04944299906492233, -0.1271103173494339, 0.030452819541096687, 0.019848955795168877, -0.03185190260410309, 0.06586895883083344, 0.0007315169204957783, 0.010839227586984634, -0.09547370672225952, -0.01799146644771099, -0.02204788289964199, 0.048699937760829926, 0.004187166225165129, 0.004053634125739336, -0.04464051127433777, -0.005158414598554373, -0.0416896678507328, -0.024279240518808365, -0.05358913540840149, -0.04719633609056473, -0.07180647552013397, 0.02559211477637291, 0.04657098650932312, 0.08353757858276367, -0.0023563469294458628, 0.046847302466630936, -0.03508693352341652, 0.0696689784526825, 0.054716791957616806, -0.012037037871778011, 0.019885245710611343, 0.01824580691754818, -0.06719563156366348, -0.05447190999984741, 0.08877509087324142, -0.01375679112970829, -0.014463561587035656, -0.049798283725976944, 0.06304343044757843, -0.007584648672491312, -0.016156170517206192, 0.024602508172392845, 0.004940119571983814, -0.04088609293103218, 0.0026271860115230083, 0.00787595845758915, -0.01889132149517536, -0.041029710322618484, 0.07343143969774246, -0.02505693957209587, -0.04825644940137863, 0.060728199779987335, 0.00460366066545248, 0.020744791254401207, 0.04238201677799225, -0.024090539664030075, -0.05792662873864174, 0.07639332860708237, -0.07511764764785767, -0.08259762078523636, 0.07901840656995773, -0.000285966758383438, 0.021390466019511223, -0.07818973809480667, -0.02385067008435726, -0.0014113716315478086, -0.055170729756355286, 0.00946732610464096, 0.02471417747437954, 0.07941421121358871, 0.006746167317032814, -0.06766024231910706, -0.089698426425457, 0.01933225803077221, -0.06994582712650299, -0.10149082541465759, 0.06007266044616699, -0.14545120298862457, -0.03447172790765762, 0.03258124738931656, 0.04966919496655464, 0.023691890761256218, -0.014501980505883694, 0.05896589905023575, 0.04760534316301346, -0.017742110416293144, 0.0019451226107776165, -0.01854461058974266, -0.04744676500558853, -0.017504630610346794, 0.05197983980178833], + "GlobalAddress":[0.021709734573960304, -0.03253590315580368, -0.04603651538491249, -0.02350226789712906, 0.02841794677078724, 0.01920732669532299, 0.053104616701602936, 0.03941836208105087, -0.01895466446876526, -0.030471740290522575, 0.010719750076532364, 0.020050356164574623, 0.03648754581809044, -0.021573888137936592, -0.02554452419281006, -3.637039117165841e-05, 0.05989491194486618, -0.006903402041643858, -0.08826262503862381, -0.028047384694218636, -0.04230065643787384, -0.05190899223089218, 0.06145390123128891, 0.0005839569494128227, -4.391977927298285e-05, -0.01880771853029728, 0.09660127758979797, 0.04333353415131569, 0.06461602449417114, -0.06010710820555687, -0.0690189078450203, 0.04574553668498993, -0.07640431076288223, 0.01879746839404106, 0.02076675370335579, 0.04869573190808296, 0.025147439911961555, 0.05311164632439613, 0.05711919441819191, 0.049520380795001984, 0.041169121861457825, -0.0603964701294899, -0.04195070639252663, 0.07676130533218384, -0.015161959454417229, 0.02903268299996853, -0.027548301964998245, 0.04705912992358208, -0.11194053292274475, -0.008245207369327545, -0.07792827486991882, -0.019468743354082108, 0.05482499673962593, -0.0028855702839791775, 0.05478052794933319, 0.07484771311283112, -0.011742575094103813, 0.00923923309892416, -0.05074375122785568, 0.06956734508275986, -0.045990440994501114, 0.007280972320586443, 0.040920473635196686, -0.09143709391355515, -0.06105270981788635, -0.0021254979074001312, -0.09519167989492416, 0.06324268877506256, -0.0693386048078537, -0.05100148543715477, 0.010643817484378815, -0.008162467740476131, -0.08811189234256744, -0.08640385419130325, 0.0077143507078289986, 0.030832089483737946, -0.01504515577107668, 0.07277517020702362, 0.02581198327243328, -0.052599068731069565, -0.06478387117385864, 0.01634707674384117, -0.021173706278204918, 0.030482977628707886, -0.09826494008302689, 0.07716016471385956, -0.10845024883747101, 0.04479274898767471, -0.015128640457987785, -0.03491876646876335, 0.05239150673151016, -0.03427724912762642, 0.06768845021724701, -0.04174086079001427, -0.05136744678020477, 0.0037109211552888155, -0.030324269086122513, -0.06928850710391998, -0.0395960658788681, 0.07726000994443893], + "RegisterMask":[0.009287647902965546, 0.029691029340028763, -0.03465871885418892, 0.032606374472379684, -0.007339544594287872, 0.03367740660905838, -0.0661492720246315, 0.0436118021607399, -0.002896533813327551, 0.028440887108445168, -0.06791415065526962, 0.004055356606841087, -0.01596181094646454, -0.003846745239570737, 0.06762582808732986, -0.025632556527853012, 0.08132420480251312, 0.025554664433002472, -0.08994632959365845, 0.02521730400621891, 0.023826507851481438, 0.0004487193073146045, 0.01047397032380104, 0.03246957063674927, -0.033482909202575684, 0.05051224306225777, 0.005778896156698465, -0.0006257061613723636, 0.00522293895483017, -0.04666636884212494, 0.022335125133395195, -0.022150320932269096, 0.04510439187288284, -0.02769547514617443, 0.026804683730006218, 0.0710473507642746, -0.014513042755424976, 0.0695318952202797, 0.048469461500644684, -0.008654370903968811, -0.028613079339265823, -0.02918054349720478, -0.022721733897924423, -0.0004791628452949226, 0.011470172554254532, 0.08561886101961136, 0.07125027477741241, -0.05847848951816559, 0.011811288073658943, -0.025244031101465225, -0.03665035218000412, -0.03482883796095848, 0.04196881502866745, 0.06909161061048508, 0.02365143597126007, -0.0689089447259903, -0.0707414448261261, -0.03962424397468567, -0.025703679770231247, 0.06502455472946167, 0.057676125317811966, 0.026916807517409325, 0.024921152740716934, 0.009799988009035587, -0.018656229600310326, 0.009880480356514454, -0.06516153365373611, 0.019290866330266, 0.02236226759850979, -0.02598695270717144, -0.00299705658107996, 0.019448822364211082, -0.014883329160511494, 0.06645222008228302, -0.028751512989401817, -0.01589173451066017, 0.026225939393043518, 0.07285763323307037, -0.06037987396121025, -0.027615630999207497, -0.039930179715156555, -0.07122864574193954, 0.029825787991285324, 0.026364129036664963, -0.04438399150967598, 0.07015394419431686, -0.013950555585324764, 0.004367176443338394, 0.020521124824881554, 0.02030497044324875, 0.011951270513236523, 0.06765977293252945, -0.015042259357869625, 0.005189584568142891, -0.07532864063978195, -0.010886142030358315, 0.006792030762881041, -0.06348442286252975, 0.031859394162893295, -0.052482619881629944], + "Metadata":[-0.07879140228033066, 0.024690961465239525, 0.022790303453803062, 0.01354144886136055, -0.07098772376775742, 0.04053819552063942, -0.04038544371724129, -0.021055836230516434, 0.10361373424530029, 0.04415135458111763, -0.09545262902975082, 0.042553599923849106, -0.021835647523403168, 0.07703430950641632, -0.04880501329898834, -0.04054124280810356, 0.05049756169319153, 0.08986796438694, 0.0705084353685379, -0.0077315340749919415, -0.045390889048576355, 0.053155045956373215, 0.045656319707632065, -0.02663712576031685, -0.01446426473557949, -0.058978915214538574, 0.011314704082906246, 0.03043927252292633, -0.0843580812215805, 0.017854437232017517, -0.08720997720956802, 0.030351335182785988, -0.04896129295229912, 0.04189978539943695, -0.09887325763702393, 0.0015409664483740926, -0.08604399859905243, 0.10654544085264206, 0.1058540865778923, 0.014106648042798042, 0.0640459656715393, -0.05182884633541107, 0.006081609521061182, 0.07624028623104095, 0.02025698497891426, 0.08467324078083038, 0.027136018499732018, 0.026320911943912506, -0.035337720066308975, 0.03864980861544609, -0.019960917532444, -0.029152821749448776, 0.06562864780426025, 0.028298277407884598, -0.07397148013114929, -0.005078969523310661, 0.025909438729286194, -0.01157586183398962, 0.05436081811785698, 0.03408071771264076, -0.07142144441604614, -0.0523630827665329, -0.06302442401647568, -0.019975490868091583, -0.06937523931264877, 0.057667043060064316, -0.08580337464809418, -0.05092239752411842, -0.012613813392817974, 0.025480754673480988, 0.04219530522823334, -0.007300581783056259, 0.05323299020528793, 0.0489904023706913, 0.09260626882314682, -0.04819458723068237, 0.05419271066784859, 0.04558999091386795, 0.012036344967782497, -0.05483977124094963, -0.05181310698390007, -0.02104383148252964, -0.057876624166965485, 0.039601441472768784, 0.025240536779165268, -0.03984035924077034, 0.07654847204685211, -0.07073183357715607, -0.0018080074805766344, -0.016453349962830544, 0.03962434455752373, 0.05717255175113678, 0.01962372660636902, 0.00952839944511652, 0.0013127806596457958, 0.013634574599564075, 0.07692103832960129, 0.06334574520587921, 0.056647684425115585, -0.02965259924530983], + "MCSymbol":[0.05158298835158348, 0.05024643987417221, 0.06704410910606384, 0.0378347709774971, -0.03902719169855118, -0.08626251667737961, 0.03964311257004738, 0.06615762412548065, 0.04361319541931152, 0.03646374121308327, -0.018487416207790375, 0.0024993624538183212, 0.006693041883409023, 0.08311881870031357, 0.021111667156219482, 0.038208797574043274, 0.08689694851636887, -0.03659898787736893, 0.020775076001882553, 0.03553535416722298, 0.06854367256164551, -0.002012243028730154, 0.03658154606819153, 0.03127564862370491, 0.0363621786236763, -0.027205800637602806, -0.05243372917175293, 0.012564878910779953, -0.013430594466626644, -0.04043225944042206, -0.025083716958761215, 0.09665156900882721, 0.005077417939901352, -0.05181048810482025, 0.08925056457519531, 0.0777667909860611, -0.013708796352148056, 0.07754126191139221, 0.08393577486276627, 0.06395212560892105, -0.07428556680679321, -0.052424050867557526, 0.03497577831149101, 0.01964585855603218, -0.0429445318877697, 0.07072066515684128, 0.0017074055504053831, 0.059513408690690994, 0.013262910768389702, -0.07240563631057739, 0.09288764744997025, 0.030620144680142403, -0.046197980642318726, 0.04847298562526703, -0.03942957893013954, -0.0025783153250813484, -0.019526517018675804, 0.038867682218551636, 0.006007499527186155, -0.06366054713726044, 0.004640159662812948, 0.013837787322700024, -0.020015377551317215, -0.010317903012037277, 0.001741019543260336, 0.06261103600263596, -0.03374830260872841, 0.01629183441400528, -0.013137640431523323, 0.026046304032206535, -0.009679407812654972, -0.07085473090410233, 0.03035539574921131, -0.08764562010765076, -0.03820766881108284, -0.04181021824479103, -0.05163294076919556, 0.06666433811187744, -0.08939782530069351, 0.040260378271341324, -0.06847432255744934, 0.09106951206922531, -0.07388591021299362, -0.07479099184274673, -0.001779694459401071, -0.0963745042681694, -0.06515862792730331, -0.08404017239809036, -0.09935544431209564, 0.010541093535721302, -0.04491754248738289, 0.09378639608621597, 0.006655062548816204, 0.06637217849493027, -0.05623293295502663, -0.020134123042225838, 0.005873391404747963, -0.07765494287014008, -0.0008442706312052906, -0.03568055108189583] + }, + "VirtualRegisters" : { + "VIRT_REG_FR32":[0.0034248235169798136, -0.011980761773884296, -0.0501178540289402, 0.0494888611137867, 0.06103336811065674, -0.06178610771894455, 0.007709897588938475, -0.011392943561077118, 0.06570645421743393, 0.0771368145942688, 0.0005577280535362661, 0.013396150432527065, -0.041660163551568985, 0.05122360959649086, 0.11354377865791321, -0.009875510819256306, -0.06466709822416306, 0.048170577734708786, 0.0007201629341579974, 0.06538223475217819, 0.08870227634906769, -0.05771782249212265, 0.009273379109799862, -0.03325295075774193, 0.01197165809571743, 0.06604835391044617, 0.08265330642461777, -0.005758166313171387, 0.02512396313250065, 0.03383670747280121, 0.038484204560518265, -0.06539343297481537, -0.013461028225719929, 0.001498897559940815, 0.05170154944062233, 0.06965786963701248, -0.07339458167552948, 0.05094756931066513, 0.01983451284468174, -0.06855696439743042, 0.07892709225416183, 0.06099703162908554, 0.08492864668369293, 0.05357863008975983, -0.009294840507209301, -0.0054923719726502895, -0.029938997700810432, 0.028260599821805954, 0.053790509700775146, -0.06574371457099915, -0.009621666744351387, -0.08131514489650726, -0.08474338054656982, 0.039622966200113297, 0.06945627927780151, 0.02545306645333767, 0.005390701815485954, 0.04582791030406952, -0.1103447750210762, -0.050917647778987885, 0.03087870217859745, 0.06918162852525711, 0.0548822283744812, -0.01838473603129387, 0.05597897991538048, 0.03548860549926758, -0.009931124746799469, -0.07856663316488266, 0.033994875848293304, 0.03467561677098274, 0.09580692648887634, -0.04153195023536682, -0.06732118874788284, -0.06857144832611084, 0.03419093042612076, -0.01200241968035698, -0.06983492523431778, 0.05929506942629814, -0.00041734304977580905, -0.026396293193101883, 0.05230500176548958, -0.006162640172988176, 0.044198282063007355, -0.028765834867954254, 0.031155114993453026, 0.06967037916183472, -0.0892564132809639, 0.028816571459174156, -0.037065472453832626, 0.06540130823850632, -0.01888667233288288, 0.030632384121418, 0.0359313078224659, 0.106044240295887, 0.03259910270571709, -0.0775517001748085, -0.04267778620123863, 0.04977935180068016, -0.01790289767086506, -0.11223265528678894], + "VIRT_REG_FR64":[0.08496882021427155, 0.049308884888887405, -0.016840212047100067, 0.010602951049804688, -4.6025739720789716e-05, -0.06524767726659775, 0.048670798540115356, -0.06444543600082397, -0.0031944462098181248, 0.05608433857560158, -0.03958145156502724, 0.05171080678701401, -0.03572545200586319, -0.054364755749702454, 0.052311528474092484, -0.0361458919942379, 0.024109655991196632, 0.15923210978507996, -0.07255382835865021, -0.011799084022641182, -0.06846465915441513, 0.0023571476340293884, 0.02642918936908245, -0.05057685822248459, 0.029800178483128548, -0.06036723777651787, -0.012272411957383156, -0.022802220657467842, -0.02426644042134285, 0.05623406544327736, -0.07506053894758224, -0.02078152634203434, 0.02549685165286064, -0.030025657266378403, -0.0627482682466507, 0.062375299632549286, 0.03684084117412567, 0.06365678459405899, 0.0004415051080286503, -0.002180535811930895, 0.05225013941526413, -0.0693102702498436, -0.03649357333779335, 0.005159272346645594, -0.03298519179224968, 0.041419681161642075, -0.05325934663414955, -0.017585784196853638, -0.03843431547284126, -0.002649943344295025, 0.033329058438539505, -0.04736043140292168, -0.043852102011442184, -0.06713785231113434, -0.03237355872988701, 0.012679073959589005, -0.01959240809082985, 0.07324203103780746, 0.07468831539154053, 0.03327644243836403, -0.01596391387283802, 0.12015434354543686, 0.051839299499988556, 0.00980563648045063, -0.08275608718395233, 0.04445798322558403, -0.03891860321164131, 0.10891054570674896, -0.008730625733733177, -0.051655255258083344, -0.05982912331819534, 0.04106972739100456, 0.06872759014368057, 0.013289053924381733, 0.03469584137201309, -0.06673429906368256, -0.0695682018995285, 0.047426726669073105, 0.02815094031393528, -0.05552271753549576, 0.0010567272547632456, -0.051840681582689285, -0.01704293303191662, -0.047185055911540985, 0.036965738981962204, 0.03452568128705025, -0.05430837720632553, 0.0383443646132946, 0.0003438846324570477, -0.030417989939451218, 0.02749026007950306, -0.0546082966029644, 0.03005768544971943, 0.0025131346192210913, 0.0013019279576838017, -0.054173994809389114, -0.008382225409150124, 0.02153395675122738, 0.011912085115909576, -0.10461334884166718], + "VIRT_REG_GR16":[0.09543223679065704, 0.03513967618346214, 0.08986528217792511, -0.012217407114803791, -0.02076001651585102, -0.04190119728446007, 0.01318269595503807, -0.010142332874238491, -0.011869532987475395, -0.040446147322654724, 0.06552371382713318, 0.04439055174589157, 0.08176156878471375, -0.06334159523248672, -0.033928077667951584, -0.00024628525716252625, 0.0244551170617342, -0.019419007003307343, -0.09592454880475998, 0.005961012560874224, 0.03278326243162155, -0.07028506696224213, -0.08484592288732529, -6.329250754788518e-05, 0.015018146485090256, -0.05068608745932579, 0.0732998326420784, 0.023434389382600784, 0.0002124009479302913, 0.060401707887649536, 0.013626078143715858, -0.010556582361459732, -0.005069760140031576, -0.004616749472916126, -0.034329116344451904, 0.060584329068660736, -0.05430089309811592, -0.029179023578763008, 0.042385730892419815, -0.0652197003364563, 0.09378205984830856, -0.05090794339776039, -0.008510591462254524, 0.0837036669254303, 0.009071480482816696, 0.04464874789118767, -0.012855015695095062, 0.06306030601263046, -0.08556588739156723, -0.05393703281879425, -0.06741822510957718, -0.03717748448252678, 0.017156923189759254, 0.07401604950428009, -0.06629005819559097, -0.04564857482910156, -0.055414989590644836, 0.039407771080732346, -0.04089723527431488, 0.06915309280157089, 0.030190052464604378, 0.027542876079678535, 0.03557966649532318, 0.05191207677125931, -0.03237364813685417, -0.02036256715655327, -0.071859210729599, -0.06704329699277878, 0.0336633175611496, 0.09511569887399673, 0.0048662531189620495, 0.05273270234465599, -0.056247059255838394, 0.06079721450805664, -0.04150049015879631, -0.08104457706212997, -0.10303051024675369, 0.04522428661584854, -0.04379847273230553, -0.019447194412350655, 0.0021319733932614326, -0.010465282015502453, 0.06857019662857056, -0.00443653529509902, -0.08039603382349014, -0.05012141168117523, 0.0875077098608017, -0.03053239732980728, -0.05321606993675232, 0.016501901671290398, -0.0563507042825222, -0.03187479078769684, -0.0015389680629596114, 0.022985411807894707, -0.05008963868021965, 0.028300117701292038, 0.02875804342329502, -0.024458128958940506, -0.022238614037632942, -0.049835607409477234], + "VIRT_REG_GR32":[-0.008479167707264423, -0.02941126376390457, 0.05343153327703476, 0.03769504278898239, -0.0006716987118124962, -0.0329299233853817, 0.03442851081490517, -0.06826753169298172, -0.09117511659860611, -0.018657755106687546, 0.029032904654741287, 0.02404048666357994, 0.010598761960864067, -0.0482308566570282, 0.06956348568201065, -0.027967501431703568, -0.07380961626768112, -0.021098148077726364, -0.0808446854352951, 0.0127912862226367, -0.01355082169175148, -0.040285225957632065, 0.035385165363550186, -0.001157263875938952, -0.026462145149707794, -0.08616211265325546, -0.044482193887233734, -0.010969695635139942, 0.04645564407110214, -0.018178211525082588, -0.038536932319402695, -0.027571648359298706, -0.007523007690906525, -0.02699458785355091, -0.039170436561107635, 0.12889482080936432, -0.04512789845466614, -0.03883056715130806, 0.051210880279541016, 0.03924906626343727, 0.036943964660167694, -0.016879307106137276, 0.011263007298111916, 0.053573690354824066, -0.018964825198054314, -0.041856080293655396, -0.036545924842357635, 0.07715532928705215, -0.041981130838394165, -0.04114629328250885, -0.04393022507429123, -0.030163627117872238, 0.0019487979589030147, 0.10988762229681015, 0.09039165079593658, -0.0035424421075731516, -0.06272851675748825, 0.007701062131673098, -0.01971622183918953, 0.06203003600239754, 0.048561323434114456, -0.04599940404295921, 0.00802221056073904, -0.002905400237068534, -0.1050020381808281, 0.003395768813788891, -0.07973644882440567, 0.008020970039069653, -0.08614815771579742, 0.0518532320857048, 0.021174483001232147, 0.03254232555627823, -0.01905026100575924, -0.0009989180834963918, -0.06409642845392227, -0.022425753995776176, -0.03563409671187401, 0.07717793434858322, -0.04553033784031868, -0.02112392708659172, -0.002374667674303055, 0.03828585892915726, -0.014221777208149433, -0.015974245965480804, -0.01805220916867256, 0.04202109947800636, -0.0841534212231636, 0.06608130037784576, -0.11586519330739975, 0.024179989472031593, 0.017091574147343636, 0.08567194640636444, -0.03692129999399185, 0.03266705200076103, -0.046154942363500595, 0.0040525165386497974, -0.03177625685930252, 0.039895471185445786, 0.042960215359926224, -0.05573953315615654], + "VIRT_REG_GR32_ABCD":[0.016604775562882423, -0.0028934956062585115, 0.041060179471969604, -0.025077441707253456, -0.018642406910657883, 0.023762650787830353, -0.028646549209952354, -0.02460283786058426, 0.005985732190310955, 0.01774146780371666, -0.004014404024928808, -0.05473850294947624, -0.0417158380150795, -0.06322457641363144, 0.060795728117227554, -0.036435071378946304, -0.04245952516794205, 0.08069344609975815, 0.035319335758686066, -0.012020719237625599, 0.045771341770887375, -0.10842540860176086, 0.046253710985183716, -0.004099135287106037, 0.030616935342550278, -0.08288344740867615, 0.08569363504648209, -0.014164377935230732, -0.004303323570638895, 0.09726760536432266, 0.06208871304988861, -0.04007713496685028, 0.005815347656607628, 0.02377200312912464, 0.07813961058855057, 0.03192306309938431, -0.006230524741113186, 0.10110925883054733, -0.023409254848957062, 0.030774405226111412, -0.011607645079493523, -0.03929119184613228, 0.004817614797502756, -0.013827506452798843, 0.07770339399576187, -0.07994075864553452, -0.03157062083482742, 0.06743781268596649, 0.014881699346005917, -0.030165214091539383, -0.07844353467226028, -0.04563238099217415, 0.09747181832790375, 0.057128582149744034, 0.04173563793301582, -0.0011194447288289666, -0.01902887038886547, -0.032171595841646194, 0.04824799671769142, 0.008433254435658455, 0.024706291034817696, 0.0746094286441803, 0.04515853151679039, -0.0018984260968863964, -0.10070884972810745, -0.01883143000304699, -0.07785795629024506, 0.10938235372304916, -0.08001448959112167, -0.07419873028993607, 0.010544849559664726, 0.025767439976334572, -0.1005895584821701, 0.05103800818324089, -0.03675306960940361, -0.020510872825980186, 0.022482097148895264, 0.06463642418384552, -0.03149804100394249, -0.021647030487656593, 0.04025804623961449, 0.003628256032243371, 0.03532547131180763, -0.08667688816785812, 0.018817460164427757, -0.01690257526934147, -0.10114696621894836, -0.022815177217125893, 0.024386661127209663, 0.10286301374435425, 0.030005114153027534, 0.0370776504278183, -0.008584428578615189, -0.077603779733181, -0.03588058054447174, 0.030617419630289078, -0.07383710891008377, 0.03215676173567772, 0.03288266062736511, -0.036702848970890045], + "VIRT_REG_GR32_NOREX":[0.019052108749747276, -0.006784944795072079, -0.05410394072532654, 0.001966317882761359, -0.06686867773532867, 0.013514372520148754, 0.030097918584942818, -0.03868359327316284, 0.004314934369176626, -0.06713679432868958, 0.02491898462176323, 0.027683967724442482, 0.035907283425331116, -0.023093875497579575, -0.0892200842499733, -0.1052003800868988, -0.03923499956727028, 0.08808581531047821, -0.10092058777809143, 0.03336786851286888, -0.08974049985408783, -0.015254802070558071, 0.039686985313892365, -0.010083628818392754, -0.03423550724983215, -0.08821681141853333, -0.05621311068534851, -0.020327769219875336, -0.016793876886367798, 0.08908043801784515, -0.04112761467695236, -0.050139520317316055, -0.01524045504629612, 0.05841142684221268, 0.08270087838172913, 0.0348736047744751, -0.016146546229720116, 0.05751227214932442, 0.05081859603524208, -0.07304663956165314, -0.047101784497499466, -0.02825125865638256, 0.0006340605323202908, 0.0008785317186266184, -0.044239338487386703, 0.007173972204327583, -0.029449066147208214, 0.07254412025213242, -0.026029080152511597, 0.025982191786170006, -0.09524690359830856, -0.052613094449043274, -0.1270490437746048, 0.05319184809923172, 0.1046818196773529, 0.0477570965886116, -0.06291303783655167, 0.04725426062941551, -0.05330964922904968, 0.04056742787361145, 0.01543382927775383, 0.03627128154039383, -0.048232536762952805, 0.014761016704142094, -0.007380587514489889, -0.008060632273554802, -0.021923277527093887, -0.022500980645418167, -0.08495079725980759, 0.045358967036008835, -0.04728720709681511, 0.03550735488533974, 0.03445536270737648, -0.01891610585153103, -0.09439470618963242, -0.044266197830438614, -0.07952893525362015, 0.05221104994416237, -0.03507477045059204, 0.04218391329050064, 0.040326621383428574, -0.0395088866353035, 0.02447870559990406, -0.04280063137412071, 0.06520935893058777, -0.003358252113685012, -0.057561881840229034, 0.01911463774740696, 0.05295571684837341, 0.030342884361743927, 0.03814920783042908, -0.03366788476705551, 0.03090745024383068, 0.09487249702215195, -0.002995486371219158, -0.012020634487271309, -0.029147809371352196, 0.09558248519897461, 0.02548893168568611, 0.0931544378399849], + "VIRT_REG_GR64":[0.02717440389096737, -0.026730243116617203, -0.023244258016347885, 0.04027782380580902, 0.006808254402130842, -0.027519788593053818, -0.01906559243798256, 0.027793627232313156, -0.00129543652292341, -0.03455121070146561, 0.021734628826379776, 0.035481199622154236, -0.07251942157745361, -0.025691546499729156, -0.03271827474236488, -0.13225725293159485, -0.0601421520113945, 0.09084498882293701, -0.10225717723369598, 0.004034099169075489, 0.023578351363539696, -0.041603971272706985, 0.04199974611401558, -0.014711204916238785, -0.04272732138633728, -0.12534455955028534, -0.023738788440823555, 0.005328727886080742, 0.038416482508182526, -0.026419155299663544, -0.041119154542684555, 0.00022502713545691222, -0.05204978585243225, -0.019709734246134758, -0.04102563485503197, 0.06480151414871216, 0.009224721230566502, 0.04627599939703941, 0.027821402996778488, -0.05595114827156067, 0.04526345059275627, 0.024196594953536987, 0.10446277260780334, 0.07561361789703369, -0.08028160035610199, -0.0314163975417614, 0.11944323033094406, 0.1025814488530159, -0.08457476645708084, 0.02227119728922844, -0.041679076850414276, -0.02260834351181984, 0.036674268543720245, 0.10488750785589218, 0.019218411296606064, -0.015966340899467468, -0.06852715462446213, 0.026523491367697716, -0.11090730130672455, -0.0021082640159875154, -0.048291631042957306, -0.032388005405664444, 0.015713853761553764, 0.03355225548148155, -0.06502845883369446, -0.010098783299326897, -0.09930021315813065, -0.017413528636097908, -0.055861033499240875, 0.0801810696721077, -0.03900628536939621, -0.03278445452451706, -0.0337282195687294, -0.11434067040681839, -0.04371264949440956, -0.01736009307205677, -0.05100121721625328, 0.07490750402212143, -0.014680330641567707, -0.02126181870698929, 0.018013890832662582, 0.0018135658465325832, 0.029781077057123184, -0.012477489188313484, -0.021443217992782593, 0.047576501965522766, -0.05993758141994476, -0.06040889024734497, 0.016642581671476364, 0.011624492704868317, -0.042229063808918, -0.007573941722512245, -0.04010608047246933, -0.006444427650421858, -0.014495199546217918, -0.04122597724199295, -0.08505907654762268, -0.004049300216138363, 0.06545045226812363, -0.04762336611747742], + "VIRT_REG_GR64_ABCD":[0.04577033221721649, -0.07758746296167374, 0.00799313560128212, -0.11011485010385513, -0.010862522758543491, 0.012709266506135464, 0.05257265642285347, -0.07354705780744553, 0.04262387007474899, 0.07554348558187485, -0.06358839571475983, 0.006669520866125822, 0.049098193645477295, 0.11183933168649673, -0.028112098574638367, 0.021986473351716995, -0.02839403599500656, -0.06199958547949791, 0.08614487200975418, -0.041216861456632614, 0.041238460689783096, 0.005937385838478804, 0.00200703926384449, -0.05337367579340935, 0.037919919937849045, -0.07485998421907425, -0.09153831005096436, -0.0554175041615963, -0.10251995176076889, -0.01289951242506504, -0.030631467700004578, 0.04197017475962639, -0.03578301519155502, 0.010593005456030369, -0.05836241692304611, 0.06809061765670776, 0.10871735960245132, -0.09833388775587082, -0.009873395785689354, -0.056898634880781174, 0.05946199968457222, 0.015534073114395142, 0.01677171140909195, -0.020233800634741783, -0.006396631710231304, -0.049332089722156525, 0.012649210169911385, 0.03756912052631378, 0.0033660116605460644, -0.09084216505289078, -0.07142844051122665, -0.0030346515122801065, 0.0019640070386230946, 0.038837920874357224, 0.011760945431888103, 0.04995080456137657, -0.06997165083885193, -0.035297296941280365, 0.01996617764234543, 0.01954355463385582, -0.0934600979089737, 0.030165065079927444, -0.007337240036576986, -0.05346155911684036, 0.0732186883687973, -0.04716489836573601, -0.06555212289094925, -0.018465254455804825, 0.051119767129421234, -0.03106619231402874, 0.0748852789402008, -0.02095886692404747, 0.006320921704173088, 0.03146332502365112, -0.08238139003515244, -0.03618254140019417, -0.014570276252925396, 0.062481846660375595, -0.0394093319773674, -0.05171547457575798, -0.044726233929395676, -0.01228095218539238, 0.09699232876300812, 0.07471026480197906, 0.03112417459487915, 0.022543631494045258, -0.08634103089570999, 0.059702761471271515, -0.013801504857838154, 0.004984616301953793, 0.045798566192388535, -0.03205988556146622, -0.06150995194911957, -0.02244667150080204, 0.03318532556295395, 0.03462471440434456, 0.03236381709575653, 0.0884014293551445, -0.01604369841516018, -0.05234146490693092], + "VIRT_REG_GR64_NOREX":[-0.03959479182958603, -0.06190898269414902, -0.02920372597873211, -0.09973344951868057, -0.004333901684731245, -0.08522991091012955, 0.0459987074136734, -0.057674553245306015, 0.037046968936920166, -0.05669403821229935, -0.02221340872347355, -0.062426190823316574, 0.05804889276623726, -0.02635439857840538, -0.045627325773239136, 0.03632078319787979, 0.07128578424453735, 0.07544906437397003, -0.0537678524851799, -0.04624016210436821, 0.014316501095890999, 0.05580946058034897, 0.05251356214284897, -0.08244197070598602, -0.08901460468769073, -0.07641059905290604, -0.04924754425883293, 0.05417120084166527, -0.0060508353635668755, -0.00814742036163807, -0.06154030188918114, 0.05966867506504059, -0.03231468051671982, 0.021429890766739845, 0.031103987246751785, 0.04343251883983612, -0.08997714519500732, 0.039365898817777634, 0.052908625453710556, -0.02683917060494423, -0.05547752603888512, -0.014131218194961548, 0.0016863569617271423, -0.041112788021564484, -0.010230163112282753, -0.06687774509191513, -0.006144971586763859, -0.08074352145195007, 0.04034091532230377, -0.08176303654909134, -0.004055786412209272, -0.0024839320685714483, -0.007289807312190533, 0.06915127485990524, 0.023709064349532127, 0.04671626538038254, 0.06229325756430626, 0.04707597941160202, 0.06800796836614609, -0.02885584905743599, 0.030613983049988747, -0.019083039835095406, 0.045457858592271805, 0.040770504623651505, -0.05441175401210785, -0.05712401866912842, 0.07744520157575607, -0.0756613239645958, -0.06890957802534103, -0.07997069507837296, 0.09348486363887787, -0.04511028528213501, 0.036194607615470886, 0.040017660707235336, 0.016245214268565178, 0.023104460909962654, 0.058383163064718246, 0.0679842159152031, -0.00921112485229969, -0.10036550462245941, 0.09075804799795151, -0.059704095125198364, -0.013338442891836166, -0.005139742512255907, 0.07807526737451553, 0.06255412846803665, -0.008151572197675705, -0.0624256506562233, 0.012590888887643814, 0.03665084019303322, -0.028498578816652298, -0.01614067517220974, 0.007552243769168854, -0.007216903381049633, 0.0760180801153183, -0.04200543463230133, 0.06412865966558456, -0.05136435106396675, -0.0024792966432869434, 0.06856651604175568], + "VIRT_REG_GR64_NOREX_NOSP":[-0.0656895712018013, 0.058077458292245865, -0.006653467658907175, 0.037784356623888016, 0.07274001836776733, 0.07232078164815903, 0.07074914127588272, 0.05637859180569649, 0.04296007752418518, 0.05499762296676636, -0.01783664897084236, -0.08387365937232971, -0.01376343984156847, -0.07938199490308762, -0.027822256088256836, -0.0663403570652008, 0.036170270293951035, -0.07460261881351471, 0.08652043342590332, 0.02483147382736206, -0.07939319312572479, 0.033202506601810455, 0.0903514102101326, -0.10181311517953873, 0.060751549899578094, 0.07619930803775787, 0.05017509311437607, -0.0470910519361496, 0.07713821530342102, -0.0426195003092289, -0.04506472498178482, 0.003363420255482197, -0.0017315347213298082, 0.06264199316501617, 0.005245774984359741, -0.027923958376049995, 0.09868567436933517, 0.06738796830177307, -0.10339145362377167, 0.0020383980590850115, 0.087734155356884, 0.011040030047297478, -0.05993311479687691, -0.05790332704782486, 0.01574312523007393, 0.009771298617124557, 0.022676382213830948, -0.009197148494422436, 0.03372732177376747, 0.08404259383678436, -0.015135225839912891, -0.04693703353404999, 0.09917140752077103, 0.007134507410228252, 0.020209072157740593, -0.00027669535484164953, -0.0351635180413723, 0.03751315921545029, -0.019665181636810303, 0.028500953689217567, 0.034186746925115585, -0.005931361112743616, 0.05645192414522171, -0.02027188241481781, -0.022675039246678352, -0.08812297880649567, -0.014896178618073463, -0.048788342624902725, 0.008708382956683636, 0.019917558878660202, -0.002275944221764803, 0.03409638628363609, 0.033304013311862946, 0.057676300406455994, 0.039842985570430756, -0.025169866159558296, 0.016520975157618523, -0.030201178044080734, -0.021718870848417282, -0.07023277878761292, -0.007528252899646759, 0.009067370556294918, -0.0460657961666584, 0.07117785513401031, -0.03609836474061012, -0.011893372051417828, -0.006047600414603949, 0.0179970171302557, 0.024480223655700684, -0.03918423503637314, 0.004897980485111475, 0.05040167644619942, 0.010113563388586044, -0.1074901670217514, -0.06277655810117722, -0.02934161201119423, -0.06922926008701324, -0.05638887360692024, 0.05314395949244499, 0.04588884115219116], + "VIRT_REG_GR64_NOSP":[0.0015277941711246967, -0.03938478231430054, -0.030811766162514687, 0.027071669697761536, 0.02127140760421753, 0.0015787228476256132, -0.07842491567134857, 0.004658385645598173, -0.05909501388669014, -0.03576778993010521, -0.07251477241516113, 0.12117832154035568, 0.04499363154172897, -0.009405314922332764, -0.01015283353626728, -0.002841090550646186, 0.0689091831445694, 0.10697457194328308, -0.09274765104055405, -0.027955353260040283, -0.0379958301782608, -0.044126156717538834, 0.04907212778925896, -0.038063473999500275, -0.003686746582388878, -0.08313410729169846, -0.045181579887866974, -0.011702840216457844, -0.006579228211194277, 0.046807315200567245, -0.045654296875, -0.03466613590717316, -0.08313826471567154, -0.06678880006074905, -0.027727074921131134, 0.036734677851200104, -0.040936414152383804, 0.05170389637351036, 0.038199927657842636, 0.02960256300866604, 0.0355701707303524, -0.02052776888012886, 0.06218089163303375, 0.10570456087589264, -0.036479029804468155, -0.008999336510896683, -0.031860992312431335, 0.07250168174505234, -0.061084795743227005, -0.057996805757284164, -0.010533110238611698, -0.018169214949011803, 0.017261315137147903, 0.10023517906665802, -0.044131457805633545, -0.07618662714958191, -0.09124933928251266, 0.01819406822323799, -0.05906827375292778, 0.04295642301440239, -0.03197735920548439, 0.03641442954540253, 0.005168464966118336, -0.00010972691961796954, -0.0829579159617424, -0.014677388593554497, -0.08750011026859283, -0.04695136100053787, -0.07696729153394699, -0.00718996487557888, 0.018294518813490868, -0.014321570284664631, -0.04416860267519951, -0.0890057235956192, -0.014466283842921257, 0.02831638976931572, -0.04845190420746803, 0.08228176832199097, 0.03420877829194069, 0.056510377675294876, 0.037403274327516556, 0.04364967346191406, 0.08903267979621887, -0.016827082261443138, -0.0682789757847786, 0.06286796927452087, -0.0958203598856926, 0.018489282578229904, 0.02886355295777321, 0.028006011620163918, 0.039986785501241684, -0.04771937429904938, -0.004648604430258274, 0.033939141780138016, -0.027820419520139694, -0.026187442243099213, -0.07972361892461777, 0.006323353853076696, 0.016448041424155235, -0.01961681991815567], + "VIRT_REG_GR64_NOSP_and_GR64_TC":[0.08079065382480621, -0.05147358775138855, -0.08338657021522522, 0.06757336109876633, -0.015237463638186455, 0.026806311681866646, 0.07564966380596161, -0.037159934639930725, -0.02222878858447075, -0.04553138092160225, -0.006632891017943621, 0.001604291144758463, 0.043711669743061066, 0.0710049569606781, -0.08854726701974869, -0.03142566233873367, -0.0865127220749855, 0.08521236479282379, 0.039203498512506485, 0.04737624153494835, 0.02893459051847458, 0.004120660945773125, 0.03552098199725151, -0.0010448878165334463, 0.04423774778842926, 0.03258584439754486, 0.03433830663561821, -0.019990455359220505, -0.03263172507286072, 0.09782663732767105, -0.00702365068718791, -0.06544602662324905, 0.013447105884552002, 0.04603038728237152, 0.029931804165244102, 0.0988783910870552, -0.062023941427469254, -0.0070026409812271595, 0.032557111233472824, -0.08212000876665115, 0.03199682757258415, 0.020828546956181526, 0.07071725279092789, -0.018812179565429688, -0.0184739138931036, -0.06008931249380112, 0.01504000648856163, -0.019235603511333466, 0.014653048478066921, -0.009083813987672329, 0.03171474114060402, 0.019499456509947777, 0.05263463407754898, 0.10554639250040054, -0.02759619802236557, -0.00156494346447289, -0.03898271545767784, 0.06027846410870552, -0.061001915484666824, 0.039365388453006744, -0.06546281278133392, 0.0006352368509396911, 0.0500405877828598, -0.03232716768980026, -0.010176514275372028, 0.002549059921875596, 0.0666508674621582, -0.037290267646312714, -0.028836704790592194, 0.06271649152040482, -0.016647985205054283, 0.013602355495095253, 0.020110899582505226, 0.011730309575796127, -0.10071564465761185, -0.06239647418260574, -0.09507977962493896, -0.09190725535154343, -0.08861985802650452, -0.0006123466300778091, 0.0951915979385376, -0.035364676266908646, -0.04007220268249512, 0.08415472507476807, 0.0006664254469797015, 0.05864431709051132, 0.01460045762360096, -0.09507087618112564, 0.024228032678365707, 0.04208158329129219, 0.006106846500188112, 0.09294755011796951, 0.06157369166612625, 0.0826527327299118, -0.058974966406822205, -0.09958664327859879, 0.06913749873638153, -0.08108915388584137, 0.07425157725811005, 0.04784728214144707], + "VIRT_REG_GR64_TC":[-0.0944172665476799, 0.040403831750154495, -0.017597073689103127, 0.04766053333878517, -0.03104357235133648, 0.025751160457730293, 0.036779265850782394, -0.0235747080296278, 0.032111138105392456, 0.009872193448245525, -0.01596468687057495, 0.05234881862998009, -0.047335200011730194, 0.005157034378498793, -0.02132921665906906, -0.0544377863407135, 0.057515472173690796, -0.006743279751390219, -0.01474941335618496, -0.0990658849477768, 0.022418741136789322, -0.007098495960235596, 0.046933863312006, 0.1002131924033165, 0.01583809033036232, 0.03995800018310547, -0.017743254080414772, -0.01684877835214138, 0.06543229520320892, 0.04597911611199379, 0.05365373566746712, -0.008774830959737301, -0.01341968309134245, -0.004754040390253067, 0.04739849269390106, 0.032378777861595154, -0.0020728895906358957, 0.03502136841416359, 0.05946416035294533, -0.06190952658653259, 0.01910495012998581, -0.023678753525018692, 0.012653682380914688, -0.06766874343156815, -0.0729866623878479, 0.0757005363702774, -0.027033904567360878, -0.06776778399944305, -0.010131776332855225, -0.06334701925516129, -0.04702980816364288, 0.06837917864322662, 0.002726735547184944, 0.04345812648534775, 0.04288078844547272, -0.06921732425689697, -0.07625382393598557, 0.037991974502801895, -0.04257906600832939, 0.06338586658239365, 0.05315309390425682, -0.02785014547407627, 0.04054750129580498, 0.06967299431562424, -0.07271680235862732, 0.0032969408202916384, -0.08254148811101913, 0.07269596308469772, -0.01827111467719078, 0.034775473177433014, 0.010106234811246395, 0.0389409065246582, 0.042805008590221405, -0.03822058066725731, 0.0668339803814888, -0.005216705612838268, -0.00022202919353730977, -0.0221820380538702, -0.027401722967624664, -0.045061662793159485, -0.05296671763062477, -0.0190189890563488, -0.002744461875408888, -0.04073096439242363, -0.06974441558122635, 0.05868958309292793, -0.06907399743795395, -0.026619713753461838, 0.015318086370825768, 0.035948701202869415, -0.08301021158695221, 0.03955607861280441, 0.028369972482323647, 0.0202812347561121, -0.12075140327215195, -0.039504438638687134, -0.03826067969202995, 0.01607581228017807, 0.02135113812983036, -0.08897850662469864], + "VIRT_REG_GR64_TC_with_sub_8bit":[0.00805664248764515, 0.06228634715080261, -0.005148644559085369, -0.025605352595448494, -0.04853198677301407, -0.018169978633522987, 0.008530518971383572, -0.1050964742898941, -0.08428415656089783, -0.014802628196775913, 0.05918573588132858, 0.07529161125421524, 0.09815273433923721, -0.014188972301781178, 0.06676790118217468, 0.09496084600687027, -0.03843621164560318, -0.00740150036290288, -0.11988909542560577, -0.01781499572098255, -0.03719411790370941, -0.07447166740894318, 0.005513608455657959, -0.014381160028278828, 0.036786310374736786, -0.04839075356721878, -0.009440913796424866, 0.03984222561120987, -0.08096668124198914, 0.026751000434160233, 0.06400448083877563, 0.07998895645141602, 2.295125523232855e-05, 0.0266779325902462, -0.0030931613873690367, 0.05236855521798134, -0.010479471646249294, -0.011119752191007137, -0.06124376133084297, -0.019449712708592415, 0.03448517248034477, -0.04095051810145378, 0.01377212442457676, 0.09643338620662689, 0.021325431764125824, 0.06029453128576279, 0.048866767436265945, -0.03436344116926193, -0.043422505259513855, 0.03822150453925133, 0.004718889016658068, -0.04090931639075279, -0.04219569265842438, 0.019032739102840424, 0.06111171841621399, 0.04305591061711311, -0.0379939004778862, -0.03224434703588486, -0.06517905741930008, 0.002272483194246888, 0.09273418039083481, -0.028145847842097282, 0.01824336126446724, 0.00936606340110302, -0.07281909137964249, -0.028650810942053795, -0.060721538960933685, -0.09477518498897552, -0.0014060320099815726, 0.06919887661933899, -0.03463669493794441, 0.0026504716370254755, -0.0653621107339859, -0.02800566703081131, -0.02503957599401474, -0.060285311192274094, 0.014794053509831429, -0.08424058556556702, 0.0482206828892231, -0.07467620074748993, -0.09909844398498535, -0.06888734549283981, -0.0014173799427226186, -0.09022543579339981, 0.06461413204669952, 0.024526789784431458, -0.07400602847337723, -0.008816084824502468, 0.025513656437397003, 0.047476526349782944, -0.05981749668717384, 0.08338218182325363, 0.02657591737806797, 0.03547860309481621, -0.043622229248285294, 0.10129662603139877, 0.08802521973848343, -0.09759330749511719, 0.025680232793092728, 0.05964493378996849], + "VIRT_REG_GR64_with_sub_16bit_in_GR16_NOREX":[-0.03117012232542038, -0.02872271090745926, -0.039712607860565186, 0.03738812729716301, 0.030099159106612206, 0.00013636364019475877, -0.019107641652226448, -0.04186702147126198, -0.053099144250154495, -0.020432034507393837, -0.0004185919533483684, 0.010934959165751934, 0.036054231226444244, 0.03788067027926445, 0.05227302014827728, -0.034505825489759445, -0.08298061788082123, 0.0399160161614418, 0.03668724000453949, 0.014606554992496967, -0.0071771652437746525, 0.059049926698207855, -0.06330917030572891, 0.007379058748483658, -0.0750177726149559, -0.0423760749399662, -0.019386067986488342, -0.018436923623085022, -0.015116279944777489, 0.023602722212672234, 0.0533282607793808, -0.026401247829198837, 0.023750485852360725, -0.027648568153381348, -0.016443056985735893, 0.04291580244898796, -0.04391908273100853, 0.05113501846790314, -0.03743087872862816, 0.056367188692092896, 0.048130668699741364, -0.0230261143296957, 0.03358393907546997, -0.030188169330358505, 0.08421863615512848, 0.0033821314573287964, 0.03151029348373413, -0.042818162590265274, 0.04007953777909279, -0.0050337472930550575, 0.03335743024945259, -0.026563530787825584, 0.016440672799944878, -0.04272226244211197, -0.07304228097200394, 0.024836458265781403, -0.016342775896191597, -0.055494848638772964, -0.05826134234666824, 0.027478834614157677, 0.025981346145272255, -0.04745938256382942, 0.013695796020328999, -0.027888784185051918, 0.03769542649388313, -0.024486247450113297, 0.04720773920416832, -0.012697651982307434, -0.03559652715921402, 0.012948199175298214, -0.025600459426641464, 0.014954420737922192, -0.06651762872934341, 0.04277091473340988, -0.08291683346033096, 0.016881149262189865, 0.04145864024758339, -0.04162050038576126, -0.03363965451717377, -0.05018439516425133, 0.06321889907121658, -0.00871780700981617, 0.06867428869009018, 0.057975344359874725, 0.009704249911010265, 0.049075234681367874, -0.06111253425478935, 0.027943406254053116, 0.03725599870085716, 0.032480716705322266, -0.01960119605064392, -0.0295172780752182, 0.014026675373315811, 0.056797921657562256, -0.031707022339105606, 0.0010152219329029322, -0.023705823346972466, -0.07695567607879639, 0.017504720017313957, -0.0020094760693609715], + "VIRT_REG_GR64_with_sub_8bit":[-0.011493992060422897, -0.027181852608919144, 0.022013556212186813, 0.05687474459409714, -0.03289574757218361, -0.04803529754281044, -0.04204253479838371, 0.044671084731817245, -0.0849028080701828, -0.09561576694250107, 0.03596775606274605, 0.027156801894307137, 0.05034027621150017, -0.006308000069111586, 0.012393618933856487, -0.048590339720249176, -0.049129705876111984, 0.059305012226104736, -0.10330235958099365, 0.00738809397444129, 0.03855152800679207, -0.03220852091908455, 0.05221837759017944, -0.01274650078266859, 0.024303985759615898, -0.05925533175468445, -0.015623844228684902, -0.025864524766802788, 0.009918035939335823, 0.004779431037604809, -0.02866589091718197, 0.006512579973787069, -0.037251196801662445, 0.005028596147894859, -0.011677909642457962, 0.051886074244976044, -0.03552602231502533, 0.011968757025897503, 0.00829426757991314, -0.06981230527162552, -0.029781555756926537, -0.012621275149285793, 0.08595969527959824, 0.08630531281232834, 0.10018875449895859, -0.054863955825567245, -0.044519901275634766, 0.0893385037779808, 0.04004377871751785, 0.003711731405928731, -0.021447300910949707, -0.08500636368989944, 0.0037281641270965338, 0.14561010897159576, 0.03993009030818939, 0.07621612399816513, 0.020513180643320084, 0.004926605150103569, -0.035578932613134384, 0.06101486086845398, -0.08422145247459412, -0.03511432558298111, 0.01537742093205452, -0.010146304965019226, -0.05133780837059021, -0.010472903028130531, -0.09726933389902115, -0.010570867918431759, -0.09348491579294205, 0.002129049738869071, -0.01265127956867218, 0.03504374623298645, -0.008679943159222603, -0.002507386729121208, -0.06586045026779175, -0.04775359109044075, -0.042809367179870605, 0.08359787613153458, -0.0230431966483593, -0.015440763905644417, 0.0195400882512331, -0.0186530202627182, -0.03176320344209671, -0.019522372633218765, -0.02984560839831829, 0.024256182834506035, -0.07656785100698471, 0.03944750130176544, 0.016559945419430733, 0.007124909665435553, 0.08061631768941879, 0.08561833202838898, -0.018525447696447372, -0.0019649232272058725, -0.018469924107193947, -0.012311050668358803, -0.08448101580142975, 0.060216110199689865, 0.06368701905012131, -0.07110093533992767], + "VIRT_REG_GR8":[0.02255251444876194, 0.012649326585233212, 0.05363747105002403, -0.006129346787929535, 0.027027001604437828, 0.03703385218977928, -0.045294541865587234, -0.02489621751010418, 0.026587747037410736, -0.06228360906243324, 0.01547946222126484, 0.03494448587298393, 0.08276952058076859, -0.03470698744058609, 0.0036826131399720907, 0.04216131567955017, -0.04518325626850128, 0.09584730118513107, -0.09126991778612137, -0.11293632537126541, 0.0141398124396801, -0.05086163431406021, 0.0421922467648983, -0.0001364851341350004, 0.05821910500526428, -0.04154132679104805, 0.036521218717098236, -0.016718950122594833, 0.0773339569568634, 0.05134757608175278, -0.03728386387228966, -0.014684299007058144, 0.016949277371168137, 0.025767508894205093, -0.01573120802640915, 0.0343811996281147, 0.008209497667849064, 0.0011038129450753331, -0.06688684970140457, -0.08167136460542679, 0.03875276446342468, 0.08301592618227005, 0.023012684658169746, 0.07135005295276642, 0.008461466059088707, 0.004998552612960339, 0.02622731775045395, -0.09479465335607529, 0.014987453818321228, -0.008574756793677807, -0.008050303906202316, -0.005560623947530985, 0.04616820812225342, 0.11537269502878189, 0.032199542969465256, 0.05507092550396919, -0.053164780139923096, 0.012255114503204823, -0.01981479674577713, 0.06012535095214844, 0.043957680463790894, 0.02384384348988533, 0.04837791621685028, 0.04945961385965347, -0.1063770279288292, -0.07354240119457245, -0.08922741562128067, -0.026019031181931496, -0.08768662065267563, 0.09241457283496857, 0.03253300115466118, -0.018267929553985596, -0.04406850412487984, -0.05577726289629936, -0.05304105579853058, 0.016035545617341995, 0.05610279366374016, 0.06247573718428612, -0.019430609419941902, -0.017088554799556732, -0.022114543244242668, 0.07442588359117508, -0.017668865621089935, -0.02403153106570244, 0.006919574458152056, 0.05879344418644905, -0.0885634645819664, -0.016336753964424133, -0.024662213400006294, 0.029266972094774246, -0.04889025166630745, 0.042460259050130844, -0.013102580793201923, 0.023992935195565224, 0.024768078699707985, 0.047551900148391724, -0.02243787795305252, 0.05929713696241379, 0.03110451251268387, -0.00550821190699935], + "VIRT_REG_RFP80":[-0.04414765536785126, 0.05147779360413551, -0.035608600825071335, -0.03939598798751831, 0.0430026613175869, -0.03331028297543526, 0.015591064468026161, 0.01892651617527008, -0.011428372003138065, -0.06980786472558975, 0.06445881724357605, 0.1036338210105896, 0.01164929661899805, -0.07599718868732452, 0.022036561742424965, 0.10396245121955872, -0.041171155869960785, -0.07264886051416397, 0.00032837275648489594, 0.02848120965063572, -0.031889040023088455, 0.023848745971918106, -0.02298046089708805, -0.05559201166033745, 0.026687605306506157, 0.0565699003636837, -0.0134252505376935, 0.05494402348995209, -0.0584089457988739, 0.05422470346093178, -0.024360226467251778, 0.03570455685257912, 0.013681530021131039, -0.006910417694598436, 0.011886067688465118, 0.07619262486696243, 0.08147607743740082, 0.05824091285467148, 0.001224246108904481, -0.030463339760899544, -0.023527851328253746, 0.03078501485288143, -0.02225799672305584, -0.058049511164426804, 0.015403151512145996, 0.07900431007146835, 0.025944147258996964, 0.021455328911542892, 0.023985104635357857, -0.0327906534075737, 0.04195002466440201, -0.10313323140144348, -0.023333510383963585, -0.010316243395209312, -0.02042137086391449, 0.07474000751972198, 0.02313513681292534, -0.0030733307357877493, 0.06138097122311592, 0.005197131074965, -0.03222955763339996, 0.005364845506846905, -0.05313501134514809, 0.0013082564109936357, 0.025044983252882957, 0.0349799208343029, 0.09704083949327469, -0.017403649166226387, -0.03375721350312233, 0.05970870703458786, -0.021679691970348358, -0.04719642922282219, 0.024217652156949043, -0.06130526587367058, 0.004813425708562136, 0.07473690062761307, -0.039600174874067307, -0.009295261465013027, 0.05440402403473854, 0.04785943776369095, -0.04006686061620712, -0.020133933052420616, 0.00989031046628952, -0.054447200149297714, 0.06291327625513077, -0.01196430902928114, 0.0841275230050087, -0.05557875707745552, -0.0813804343342781, -0.0746457576751709, -0.024255990982055664, -0.048101916909217834, -0.014132879674434662, -0.013147399760782719, -0.009715595282614231, 0.08717820793390274, -0.04318689927458763, -0.0311901792883873, -0.017253845930099487, 0.005144816357642412], + "VIRT_REG_VR128":[0.08292517066001892, 0.053138989955186844, 0.0019234063802286983, -0.030035940930247307, 0.0821828693151474, -0.0540342852473259, 0.06449387222528458, -0.03985493257641792, 0.026820721104741096, 0.0352952741086483, -0.1056072935461998, 0.054804764688014984, 0.01685425080358982, 0.05867069214582443, 0.11665259301662445, -0.07655566930770874, 0.021201618015766144, 0.00927705504000187, -0.04723019897937775, 0.016582123935222626, -0.01160470675677061, -0.013075411319732666, 0.01054342370480299, -0.05403316020965576, 0.033609066158533096, -0.07971179485321045, 0.1005927175283432, -0.020655132830142975, -0.0036442605778574944, 0.018269486725330353, 0.036334097385406494, -0.06517180055379868, -0.028530113399028778, -0.03768114373087883, 0.10582506656646729, 0.011199450120329857, -0.06707775592803955, 0.02332702837884426, -0.014528930187225342, -0.09369251132011414, 0.069722481071949, 0.031001657247543335, 0.08032777905464172, -0.060744334012269974, 0.015131807886064053, 0.01935953088104725, -0.087028868496418, 0.041773099452257156, 0.0381581112742424, -0.07518653571605682, 0.021307995542883873, -0.07350508868694305, -0.04699733853340149, -0.007377162110060453, 0.07836157828569412, 0.016066696494817734, -0.02160775288939476, -0.030519334599375725, -0.09255059063434601, 0.03597188740968704, -0.11260625720024109, -0.08602424710988998, 0.058293748646974564, -0.034749604761600494, 0.005541469436138868, -0.07924741506576538, -0.024103455245494843, 0.06047135218977928, 0.026729481294751167, 0.03493977710604668, -0.07453227788209915, -0.01716521382331848, 0.008985077030956745, -0.08075122535228729, 0.03353623300790787, -0.08125714957714081, 0.04245763644576073, 0.06520543247461319, 0.020550349727272987, -0.003161275526508689, -0.03491697832942009, -0.005496494937688112, 0.09021904319524765, -0.057418785989284515, 0.03494826331734657, -0.052578359842300415, -0.044952504336833954, 0.11770184338092804, -0.048565153032541275, -0.03815764561295509, 0.06020108237862587, -0.09397949278354645, 0.03820547088980675, 0.08039405196905136, 0.014751153998076916, 0.006572262849658728, 0.05658692866563797, 0.05043925344944, -0.0060436660423874855, -0.12018798291683197], + "VIRT_REG_VR256":[0.032775089144706726, 0.029240285977721214, 0.01821955479681492, 0.023595772683620453, -0.02587016113102436, -0.12190376222133636, 0.09720813482999802, 0.005780891049653292, -0.0581410676240921, 0.04817686229944229, -0.04627984017133713, 0.03618951886892319, -0.10393846780061722, 0.04380590096116066, 0.030101926997303963, -0.021811308339238167, 0.0012455569813027978, 0.06209835410118103, -0.08859474956989288, 0.0671553835272789, -0.006448917090892792, 0.0169842429459095, 0.031113164499402046, -0.07417412847280502, 0.05549546331167221, -0.013042094185948372, 0.0948401540517807, -0.07335975021123886, -0.03987044095993042, -0.005343804135918617, -0.08741248399019241, -0.08009110391139984, 0.005667346995323896, 0.03745159134268761, 0.019986214116215706, -0.03723142296075821, -0.0037649653386324644, 0.005682446528226137, 0.0659727230668068, -0.002658356446772814, 0.07049102336168289, -0.01944110542535782, -0.014278342947363853, 0.04189611226320267, 0.0312303826212883, -0.046760618686676025, 0.040438465774059296, 0.054074693471193314, 0.07479880005121231, -0.016405146569013596, 0.027125591412186623, -0.04216836765408516, 0.0011189498472958803, -0.01471384521573782, -0.010250975377857685, -0.006412460468709469, -0.12170380353927612, 0.015495882369577885, -0.054699406027793884, 0.05955614894628525, 0.06753991544246674, -0.03688138723373413, 0.049010518938302994, -0.07614680379629135, 0.06504888087511063, -0.014145595952868462, 0.02210555598139763, 0.023598313331604004, 0.00511248828843236, 0.013318972662091255, -0.11605404317378998, -0.032067783176898956, -0.05010659247636795, -0.023693162947893143, 0.06650379300117493, -0.026386691257357597, 0.06052805855870247, 0.0515507273375988, 0.033960308879613876, -0.06421340256929398, -0.09355985373258591, -0.0658700093626976, 0.10278744995594025, -0.10271084308624268, -0.012089421041309834, -0.04169749841094017, -0.07112454622983932, -0.032573599368333817, -0.0003141233173664659, 0.017007946968078613, 0.03622191399335861, 0.05829676240682602, 0.06261610984802246, 0.005667738616466522, 0.009631159715354443, 0.022852277383208275, 0.057013869285583496, -0.05015721917152405, 0.027599012479186058, -0.08637165278196335] + }, + "PhysicalRegisters" : { + "PHY_REG":[-0.008169060572981834, -0.017023155465722084, -0.04927198588848114, 0.0014261528849601746, 0.012259463779628277, -0.02794509381055832, -0.024857040494680405, 0.029203711077570915, 0.0433109886944294, 0.009679347276687622, -0.05811547115445137, -0.09075025469064713, -0.08525611460208893, -0.10545054078102112, 0.06474080681800842, 0.056396666914224625, 0.06781823933124542, 0.09059076011180878, -0.10420752316713333, -0.08284831047058105, 0.02349182404577732, -0.0354253351688385, -0.004627702757716179, 0.0068538435734808445, -0.053724177181720734, -0.02113335393369198, 0.05254676192998886, -0.050769440829753876, 0.061386119574308395, -0.07541731745004654, -0.024204161018133163, -0.0009893826209008694, -0.007493770215660334, -0.017051052302122116, 0.015025814063847065, -0.020427946001291275, -0.0844966471195221, 0.04589429497718811, 0.025571472942829132, -0.05280151963233948, 0.06895384937524796, 0.03960262984037399, 0.0068003153428435326, 0.09397424012422562, -0.0523529127240181, 0.03780638054013252, -0.015423302538692951, 0.029167350381612778, 0.01019437238574028, 0.023989612236618996, -0.03344425559043884, -0.07926471531391144, -0.09238854795694351, 0.04794330149888992, 0.01872367039322853, -0.029179377481341362, -0.05339968949556351, -0.04575541242957115, -0.004491546656936407, -0.009650425054132938, 0.026945313438773155, -0.02115861512720585, 0.06488905847072601, -0.06647083908319473, 0.008904196321964264, 0.010536684654653072, -0.06012551859021187, -0.00022655133216176182, -0.10175421833992004, 0.062001921236515045, -0.054452817887067795, 0.01785552129149437, -0.06749527156352997, -0.04883178323507309, -0.023449009284377098, 0.040745027363300323, 0.002448269398882985, 0.07842953503131866, -0.019806355237960815, -0.08275315910577774, 0.01131721492856741, 0.0482926219701767, 0.01892486959695816, 0.005685009527951479, -0.0055344682186841965, -0.0034555341117084026, -0.07923021167516708, 0.06387833505868912, 0.05978211387991905, -0.001252106623724103, 0.07216084003448486, -0.01223798282444477, 0.09716741740703583, 0.009659498929977417, -0.09404221922159195, -0.10122949630022049, -0.003581057768315077, 0.07885389029979706, 0.05305042862892151, -0.04988719895482063] } }
\ No newline at end of file diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp new file mode 100644 index 0000000..59d3ffc --- /dev/null +++ b/llvm/lib/AsmParser/AsmParserContext.cpp @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/AsmParserContext.h" + +namespace llvm { + +std::optional<FileLocRange> +AsmParserContext::getFunctionLocation(const Function *F) const { + if (auto FIt = Functions.find(F); FIt != Functions.end()) + return FIt->second; + return std::nullopt; +} + +std::optional<FileLocRange> +AsmParserContext::getBlockLocation(const BasicBlock *BB) const { + if (auto BBIt = Blocks.find(BB); BBIt != Blocks.end()) + return BBIt->second; + return std::nullopt; +} + +std::optional<FileLocRange> +AsmParserContext::getInstructionLocation(const Instruction *I) const { + if (auto IIt = Instructions.find(I); IIt != Instructions.end()) + return IIt->second; + return std::nullopt; +} + +Function * +AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const { + for (auto &[F, Loc] : Functions) { + if (Loc.contains(Query)) + return F; + } + return nullptr; +} + +Function *AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const { + return getFunctionAtLocation(FileLocRange(Query, Query)); +} + +BasicBlock * +AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const { + for (auto &[BB, Loc] : Blocks) { + if (Loc.contains(Query)) + return BB; + } + return nullptr; +} + +BasicBlock *AsmParserContext::getBlockAtLocation(const FileLoc &Query) const { + return getBlockAtLocation(FileLocRange(Query, Query)); +} + +Instruction * +AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const { + for (auto &[I, Loc] : Instructions) { + if (Loc.contains(Query)) + return I; + } + return nullptr; +} + +Instruction * +AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const { + return getInstructionAtLocation(FileLocRange(Query, Query)); +} + +bool AsmParserContext::addFunctionLocation(Function *F, + const FileLocRange &Loc) { + return Functions.insert({F, Loc}).second; +} + +bool AsmParserContext::addBlockLocation(BasicBlock *BB, + const FileLocRange &Loc) { + return Blocks.insert({BB, Loc}).second; +} + +bool AsmParserContext::addInstructionLocation(Instruction *I, + const FileLocRange &Loc) { + return Instructions.insert({I, Loc}).second; +} + +} // namespace llvm diff --git a/llvm/lib/AsmParser/CMakeLists.txt b/llvm/lib/AsmParser/CMakeLists.txt index 20d0c50..dcfcc06 100644 --- a/llvm/lib/AsmParser/CMakeLists.txt +++ b/llvm/lib/AsmParser/CMakeLists.txt @@ -1,5 +1,6 @@ # AsmParser add_llvm_component_library(LLVMAsmParser + AsmParserContext.cpp LLLexer.cpp LLParser.cpp Parser.cpp diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 50d1d47..7a6c19e 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -191,6 +191,8 @@ int LLLexer::getNextChar() { } lltok::Kind LLLexer::LexToken() { + // Set token end to next location, since the end is exclusive. + PrevTokEnd = CurPtr; while (true) { TokStart = CurPtr; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index f71a534..5164cec 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -752,14 +752,21 @@ bool LLParser::parseDeclare() { /// ::= 'define' FunctionHeader (!dbg !56)* '{' ... bool LLParser::parseDefine() { assert(Lex.getKind() == lltok::kw_define); + FileLoc FunctionStart(Lex.getTokLineColumnPos()); Lex.Lex(); Function *F; unsigned FunctionNumber = -1; SmallVector<unsigned> UnnamedArgNums; - return parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) || - parseOptionalFunctionMetadata(*F) || - parseFunctionBody(*F, FunctionNumber, UnnamedArgNums); + bool RetValue = + parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) || + parseOptionalFunctionMetadata(*F) || + parseFunctionBody(*F, FunctionNumber, UnnamedArgNums); + if (ParserContext) + ParserContext->addFunctionLocation( + F, FileLocRange(FunctionStart, Lex.getPrevTokEndLineColumnPos())); + + return RetValue; } /// parseGlobalType @@ -7018,6 +7025,8 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber, /// parseBasicBlock /// ::= (LabelStr|LabelID)? Instruction* bool LLParser::parseBasicBlock(PerFunctionState &PFS) { + FileLoc BBStart(Lex.getTokLineColumnPos()); + // If this basic block starts out with a name, remember it. std::string Name; int NameID = -1; @@ -7059,6 +7068,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord); } + FileLoc InstStart(Lex.getTokLineColumnPos()); // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". LocTy NameLoc = Lex.getLoc(); @@ -7108,8 +7118,16 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { for (DbgRecordPtr &DR : TrailingDbgRecord) BB->insertDbgRecordBefore(DR.release(), Inst->getIterator()); TrailingDbgRecord.clear(); + if (ParserContext) { + ParserContext->addInstructionLocation( + Inst, FileLocRange(InstStart, Lex.getPrevTokEndLineColumnPos())); + } } while (!Inst->isTerminator()); + if (ParserContext) + ParserContext->addBlockLocation( + BB, FileLocRange(BBStart, Lex.getPrevTokEndLineColumnPos())); + assert(TrailingDbgRecord.empty() && "All debug values should have been attached to an instruction."); diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp index 07fdce9..c5346d0 100644 --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -24,33 +24,38 @@ using namespace llvm; static bool parseAssemblyInto(MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots, bool UpgradeDebugInfo, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext = nullptr) { SourceMgr SM; std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F); SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); std::optional<LLVMContext> OptContext; return LLParser(F.getBuffer(), SM, Err, M, Index, - M ? M->getContext() : OptContext.emplace(), Slots) + M ? M->getContext() : OptContext.emplace(), Slots, + ParserContext) .Run(UpgradeDebugInfo, DataLayoutCallback); } bool llvm::parseAssemblyInto(MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext) { return ::parseAssemblyInto(F, M, Index, Err, Slots, - /*UpgradeDebugInfo*/ true, DataLayoutCallback); + /*UpgradeDebugInfo*/ true, DataLayoutCallback, + ParserContext); } std::unique_ptr<Module> llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, - SlotMapping *Slots, - DataLayoutCallbackTy DataLayoutCallback) { + SlotMapping *Slots, DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext) { std::unique_ptr<Module> M = std::make_unique<Module>(F.getBufferIdentifier(), Context); - if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback)) + if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback, + ParserContext)) return nullptr; return M; @@ -133,12 +138,14 @@ ParsedModuleAndIndex llvm::parseAssemblyFileWithIndexNoUpgradeDebugInfo( DataLayoutCallback); } -std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString, - SMDiagnostic &Err, - LLVMContext &Context, - SlotMapping *Slots) { +std::unique_ptr<Module> +llvm::parseAssemblyString(StringRef AsmString, SMDiagnostic &Err, + LLVMContext &Context, SlotMapping *Slots, + AsmParserContext *ParserContext) { MemoryBufferRef F(AsmString, "<string>"); - return parseAssembly(F, Err, Context, Slots); + return parseAssembly( + F, Err, Context, Slots, [](StringRef, StringRef) { return std::nullopt; }, + ParserContext); } static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index cf7efbfa..466dcb0 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8603,7 +8603,7 @@ getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream, case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x2ff && "Unexpected bits in flag"); + assert(Flags <= 0x7ff && "Unexpected bits in flag"); bool EnableSplitLTOUnit = Flags & 0x8; bool UnifiedLTO = Flags & 0x200; diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp index b1cd939..aeb4a4d 100644 --- a/llvm/lib/CGData/CodeGenDataReader.cpp +++ b/llvm/lib/CGData/CodeGenDataReader.cpp @@ -125,7 +125,7 @@ Error IndexedCodeGenDataReader::read() { FunctionMapRecord.setReadStableFunctionMapNames( IndexedCodeGenDataReadFunctionMapNames); if (IndexedCodeGenDataLazyLoading) - FunctionMapRecord.lazyDeserialize(SharedDataBuffer, + FunctionMapRecord.lazyDeserialize(std::move(SharedDataBuffer), Header.StableFunctionMapOffset); else FunctionMapRecord.deserialize(Ptr); diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp index 46e04bd..d0fae3a 100644 --- a/llvm/lib/CGData/StableFunctionMap.cpp +++ b/llvm/lib/CGData/StableFunctionMap.cpp @@ -137,6 +137,7 @@ size_t StableFunctionMap::size(SizeType Type) const { const StableFunctionMap::StableFunctionEntries & StableFunctionMap::at(HashFuncsMapType::key_type FunctionHash) const { auto It = HashToFuncs.find(FunctionHash); + assert(It != HashToFuncs.end() && "FunctionHash not found!"); if (isLazilyLoaded()) deserializeLazyLoadingEntry(It); return It->second.Entries; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fefde64f..8aa488f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" @@ -184,6 +185,8 @@ static cl::opt<bool> PrintLatency( cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden, cl::init(false)); +extern cl::opt<bool> EmitBBHash; + STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; @@ -474,6 +477,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<GCModuleInfo>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + if (EmitBBHash) + AU.addRequired<MachineBlockHashInfo>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1434,14 +1439,11 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, "BB entries info is required for BBFreq and BrProb " "features"); } - return {FuncEntryCountEnabled, - BBFreqEnabled, - BrProbEnabled, + return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, // Use static_cast to avoid breakage of tests on windows. - static_cast<bool>(BBAddrMapSkipEmitBBEntries), - HasCalls, - false}; + static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, + static_cast<bool>(EmitBBHash)}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1500,6 +1502,9 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { PrevMBBEndSymbol = MBBSymbol; } + auto MBHI = + Features.BBHash ? &getAnalysis<MachineBlockHashInfo>() : nullptr; + if (!Features.OmitBBEntries) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. @@ -1527,6 +1532,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel); // Emit the Metadata. OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + // Emit the Hash. + if (MBHI) { + OutStreamer->emitInt64(MBHI->getMBBHash(MBB)); + } } PrevMBBEndSymbol = MBB.getEndSymbol(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 9288d7e..9c0b68b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -334,7 +334,7 @@ public: const DIE &TyDIE); protected: - ~DwarfUnit(); + ~DwarfUnit() override; /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index b6872605..4373c53 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen LowerEmuTLS.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp + MachineBlockHashInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCFGPrinter.cpp diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9e78ec9..8ea1326 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4030,7 +4030,6 @@ bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; - const SimplifyQuery &SQ; // Tracks newly created Phi nodes. The elements are iterated by insertion // order. PhiNodeSet AllPhiNodes; @@ -4038,8 +4037,6 @@ class SimplificationTracker { SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} - Value *Get(Value *V) { do { auto SV = Storage.find(V); @@ -4049,30 +4046,6 @@ public: } while (true); } - Value *Simplify(Value *Val) { - SmallVector<Value *, 32> WorkList; - SmallPtrSet<Value *, 32> Visited; - WorkList.push_back(Val); - while (!WorkList.empty()) { - auto *P = WorkList.pop_back_val(); - if (!Visited.insert(P).second) - continue; - if (auto *PI = dyn_cast<Instruction>(P)) - if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { - for (auto *U : PI->users()) - WorkList.push_back(cast<Value>(U)); - Put(PI, V); - PI->replaceAllUsesWith(V); - if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.erase(PHI); - if (auto *Select = dyn_cast<SelectInst>(PI)) - AllSelectNodes.erase(Select); - PI->eraseFromParent(); - } - } - return Get(Val); - } - void Put(Value *From, Value *To) { Storage.insert({From, To}); } void ReplacePhi(PHINode *From, PHINode *To) { @@ -4133,8 +4106,7 @@ private: /// Common Type for all different fields in addressing modes. Type *CommonType = nullptr; - /// SimplifyQuery for simplifyInstruction utility. - const SimplifyQuery &SQ; + const DataLayout &DL; /// Original Address. Value *Original; @@ -4143,8 +4115,8 @@ private: Value *CommonValue = nullptr; public: - AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) - : SQ(_SQ), Original(OriginalValue) {} + AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue) + : DL(DL), Original(OriginalValue) {} ~AddressingModeCombiner() { eraseCommonValueIfDead(); } @@ -4256,7 +4228,7 @@ private: // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector<Value *, 2> NullValue; - Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { @@ -4306,7 +4278,7 @@ private: // simplification is possible only if original phi/selects were not // simplified yet. // Using this mapping we can find the current value in AddrToBase. - SimplificationTracker ST(SQ); + SimplificationTracker ST; // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. @@ -4465,7 +4437,6 @@ private: PHI->addIncoming(ST.Get(Map[PV]), B); } } - Map[Current] = ST.Simplify(V); } } @@ -5856,8 +5827,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction *, 16> AddrModeInsts; - const SimplifyQuery SQ(*DL, TLInfo); - AddressingModeCombiner AddrModes(SQ, Addr); + AddressingModeCombiner AddrModes(*DL, Addr); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 2b5ced3..f44eb22 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -1108,8 +1108,8 @@ public: }; } // namespace -ExpandFpPass::ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel) - : TM(TM), OptLevel(OptLevel) {} +ExpandFpPass::ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel) + : TM(&TM), OptLevel(OptLevel) {} void ExpandFpPass::printPipeline( raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 2cba6f0..0665437 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -62,7 +62,7 @@ public: static std::unique_ptr<WorkListMaintainer> create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI); - virtual ~WorkListMaintainer() = default; + ~WorkListMaintainer() override = default; void reportFullyCreatedInstrs() { LLVM_DEBUG({ @@ -95,7 +95,7 @@ public: WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI) : WorkList(WorkList), MRI(MRI) {} - virtual ~WorkListMaintainerImpl() = default; + ~WorkListMaintainerImpl() override = default; void reset() override { DeferList.clear(); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b425b95..1f10478 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors( MI.eraseFromParent(); } -bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction"); - auto &Shuffle = cast<GShuffleVector>(MI); - - Register SrcVec1 = Shuffle.getSrc1Reg(); - Register SrcVec2 = Shuffle.getSrc2Reg(); - - LLT SrcVec1Type = MRI.getType(SrcVec1); - LLT SrcVec2Type = MRI.getType(SrcVec2); - return SrcVec1Type.isVector() && SrcVec2Type.isVector(); -} - void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const { auto &Shuffle = cast<GShuffleVector>(MI); @@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector( LLT DstType = MRI.getType(MI.getOperand(0).getReg()); Register Src1 = MI.getOperand(1).getReg(); LLT SrcType = MRI.getType(Src1); - // As bizarre as it may look, shuffle vector can actually produce - // scalar! This is because at the IR level a <1 x ty> shuffle - // vector is perfectly valid. - unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; - unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; + + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); // If the resulting vector is smaller than the size of the source // vectors being concatenated, we won't be able to replace the @@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector( // // TODO: If the size between the source and destination don't match // we could still emit an extract vector element in that case. - if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) + if (DstNumElts < 2 * SrcNumElts) return false; // Check that the shuffle mask can be broken evenly between the @@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector( MI.eraseFromParent(); } -bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction kind"); - - ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - return Mask.size() == 1; -} - -void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const { - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInsertPt(*MI.getParent(), MI); - - int I = MI.getOperand(3).getShuffleMask()[0]; - Register Src1 = MI.getOperand(1).getReg(); - LLT Src1Ty = MRI.getType(Src1); - int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; - Register SrcReg; - if (I >= Src1NumElts) { - SrcReg = MI.getOperand(2).getReg(); - I -= Src1NumElts; - } else if (I >= 0) - SrcReg = Src1; - - if (I < 0) - Builder.buildUndef(DstReg); - else if (!MRI.getType(SrcReg).isVector()) - Builder.buildCopy(DstReg, SrcReg); - else - Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I); - - MI.eraseFromParent(); -} - namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, return false; ArrayRef<int> Mask = Shuffle.getMask(); - const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + const unsigned NumSrcElems = Src1Ty.getNumElements(); bool TouchesSrc1 = false; bool TouchesSrc2 = false; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 04d9309..d6f23b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, Depth + 1); + OffsetKnown = OffsetKnown.sext(BitWidth); + WidthKnown = WidthKnown.sext(BitWidth); Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); // Sign extend the extracted value using shift left and arithmetic shift // right. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 884c3f1..1fc90d0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -139,7 +139,7 @@ class DILocationVerifier : public GISelChangeObserver { public: DILocationVerifier() = default; - ~DILocationVerifier() = default; + ~DILocationVerifier() override = default; const Instruction *getCurrentInst() const { return CurrInst; } void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; } @@ -1862,15 +1862,19 @@ bool IRTranslator::translateVectorDeinterleave2Intrinsic( void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { + Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); + if (!Global) { + LLVMContext &Ctx = MIRBuilder.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + MIRBuilder.buildUndef(DstReg); + return; + } + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MRI->setRegClass(DstReg, TRI->getPointerRegClass()); auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {}); - Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); - if (!Global) - return; - unsigned AddrSpace = Global->getType()->getPointerAddressSpace(); LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace)); @@ -3355,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U, Mask = SVI->getShuffleMask(); else Mask = cast<ConstantExpr>(U).getShuffleMask(); + + // As GISel does not represent <1 x > vectors as a separate type from scalars, + // we transform shuffle_vector with a scalar output to an + // ExtractVectorElement. If the input type is also scalar it becomes a Copy. + unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements(); + unsigned SrcElts = + cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements(); + if (DstElts == 1) { + unsigned M = Mask[0]; + if (SrcElts == 1) { + if (M == 0 || M == 1) + return translateCopy(U, *U.getOperand(M), MIRBuilder); + MIRBuilder.buildUndef(getOrCreateVReg(U)); + } else { + Register Dst = getOrCreateVReg(U); + if (M < SrcElts) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(0)), M); + } else if (M < SrcElts * 2) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts); + } else { + MIRBuilder.buildUndef(Dst); + } + } + return true; + } + + // A single element src is transformed to a build_vector. + if (SrcElts == 1) { + SmallVector<Register> Ops; + Register Undef; + for (int M : Mask) { + LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL); + if (M == 0 || M == 1) { + Ops.push_back(getOrCreateVReg(*U.getOperand(M))); + } else { + if (!Undef.isValid()) { + Undef = MRI->createGenericVirtualRegister(SrcTy); + MIRBuilder.buildUndef(Undef); + } + Ops.push_back(Undef); + } + } + MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops); + return true; + } + ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask); MIRBuilder .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 38ec83f..178529f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FMINIMUMNUM: case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); + case G_FMINIMUM: + case G_FMAXIMUM: + return lowerFMinimumMaximum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); case G_UNMERGE_VALUES: @@ -5819,6 +5822,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); + } else if (NewElts == 1) { + Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0); } else { Register Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. @@ -8775,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); + LLT Ty = MRI.getType(Dst); + LLT CmpTy = Ty.changeElementSize(1); + + bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); + unsigned OpcIeee = + IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; + unsigned OpcNonIeee = + IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; + bool MinMaxMustRespectOrderedZero = false; + Register Res; + + // IEEE variants don't need canonicalization + if (LI.isLegalOrCustom({OpcIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); + MinMaxMustRespectOrderedZero = true; + } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); + } else { + auto Compare = MIRBuilder.buildFCmp( + IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); + Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); + } + + // Propagate any NaN of both operands + if (!MI.getFlag(MachineInstr::FmNoNans) && + (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { + auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + + LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); + APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); + Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); + if (Ty.isVector()) + NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + + Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { + GISelValueTracking VT(MIRBuilder.getMF()); + KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); + KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + + if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { + const unsigned Flags = MI.getFlags(); + Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); + auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + + unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + + auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); + auto LHSSelect = + MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + + auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); + auto RHSSelect = + MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + + Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); + } + } + + MIRBuilder.buildCopy(Dst, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c Register DstReg = MI.getOperand(0).getReg(); @@ -9016,22 +9092,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { continue; } - if (Src0Ty.isScalar()) { - BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); - } else { - int NumElts = Src0Ty.getNumElements(); - Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; - int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; - auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); - auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); - BuildVec.push_back(Extract.getReg(0)); - } + assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR"); + + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); } - if (DstTy.isVector()) - MIRBuilder.buildBuildVector(DstReg, BuildVec); - else - MIRBuilder.buildCopy(DstReg, BuildVec[0]); + assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR"); + MIRBuilder.buildBuildVector(DstReg, BuildVec); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 27df7e3..4b4df98 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); LLT Src1Ty = Src1.getLLTTy(*getMRI()); LLT Src2Ty = Src2.getLLTTy(*getMRI()); - const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; - const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty; - const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty; + const LLT DstElemTy = DstTy.getScalarType(); + const LLT ElemTy1 = Src1Ty.getScalarType(); + const LLT ElemTy2 = Src2Ty.getScalarType(); assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2); + assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported"); (void)DstElemTy; (void)ElemTy1; (void)ElemTy2; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index d6e8505..c3e0964 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -721,6 +721,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Allocate a new register for the remat. Register NewVReg = Edit->createFrom(Original); + // Constrain it to the register class of MI. + MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg())); + // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index b655375..94e3a82 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -69,7 +69,7 @@ public: static char ID; LiveDebugValuesLegacy(); - ~LiveDebugValuesLegacy() = default; + ~LiveDebugValuesLegacy() override = default; /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index b9ea03f..1c4b2f9 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1094,7 +1094,7 @@ public: /// Default construct and initialize the pass. VarLocBasedLDV(); - ~VarLocBasedLDV(); + ~VarLocBasedLDV() override; /// Print to ostream with a message. void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V, diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index 99be1fc0..00b37e7 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -42,6 +42,13 @@ static cl::opt<std::string> cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), cl::desc("Weight for machine opcode embeddings"), cl::cat(MIR2VecCategory)); +cl::opt<float> CommonOperandWeight( + "mir2vec-common-operand-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for common operand embeddings"), cl::cat(MIR2VecCategory)); +cl::opt<float> + RegOperandWeight("mir2vec-reg-operand-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for register operand embeddings"), + cl::cat(MIR2VecCategory)); cl::opt<MIR2VecKind> MIR2VecEmbeddingKind( "mir2vec-kind", cl::Optional, cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic", @@ -56,26 +63,52 @@ cl::opt<MIR2VecKind> MIR2VecEmbeddingKind( // Vocabulary //===----------------------------------------------------------------------===// -MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries, - const TargetInstrInfo &TII) - : TII(TII) { +MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeMap, VocabMap &&CommonOperandMap, + VocabMap &&PhysicalRegisterMap, + VocabMap &&VirtualRegisterMap, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI) + : TII(TII), TRI(TRI), MRI(MRI) { buildCanonicalOpcodeMapping(); - unsigned CanonicalOpcodeCount = UniqueBaseOpcodeNames.size(); assert(CanonicalOpcodeCount > 0 && "No canonical opcodes found for target - invalid vocabulary"); - Layout.OperandBase = CanonicalOpcodeCount; - generateStorage(OpcodeEntries); + + buildRegisterOperandMapping(); + + // Define layout of vocabulary sections + Layout.OpcodeBase = 0; + Layout.CommonOperandBase = CanonicalOpcodeCount; + // We expect same classes for physical and virtual registers + Layout.PhyRegBase = Layout.CommonOperandBase + std::size(CommonOperandNames); + Layout.VirtRegBase = Layout.PhyRegBase + RegisterOperandNames.size(); + + generateStorage(OpcodeMap, CommonOperandMap, PhysicalRegisterMap, + VirtualRegisterMap); Layout.TotalEntries = Storage.size(); } -Expected<MIRVocabulary> MIRVocabulary::create(VocabMap &&Entries, - const TargetInstrInfo &TII) { - if (Entries.empty()) +Expected<MIRVocabulary> +MIRVocabulary::create(VocabMap &&OpcodeMap, VocabMap &&CommonOperandMap, + VocabMap &&PhyRegMap, VocabMap &&VirtRegMap, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { + if (OpcodeMap.empty() || CommonOperandMap.empty() || PhyRegMap.empty() || + VirtRegMap.empty()) return createStringError(errc::invalid_argument, "Empty vocabulary entries provided"); - return MIRVocabulary(std::move(Entries), TII); + MIRVocabulary Vocab(std::move(OpcodeMap), std::move(CommonOperandMap), + std::move(PhyRegMap), std::move(VirtRegMap), TII, TRI, + MRI); + + // Validate Storage after construction + if (!Vocab.Storage.isValid()) + return createStringError(errc::invalid_argument, + "Failed to create valid vocabulary storage"); + Vocab.ZeroEmbedding = Embedding(Vocab.Storage.getDimension(), 0.0); + return std::move(Vocab); } std::string MIRVocabulary::extractBaseOpcodeName(StringRef InstrName) { @@ -122,22 +155,74 @@ unsigned MIRVocabulary::getCanonicalOpcodeIndex(unsigned Opcode) const { return getCanonicalIndexForBaseName(BaseOpcode); } +unsigned +MIRVocabulary::getCanonicalIndexForOperandName(StringRef OperandName) const { + auto It = std::find(std::begin(CommonOperandNames), + std::end(CommonOperandNames), OperandName); + assert(It != std::end(CommonOperandNames) && + "Operand name not found in common operands"); + return Layout.CommonOperandBase + + std::distance(std::begin(CommonOperandNames), It); +} + +unsigned +MIRVocabulary::getCanonicalIndexForRegisterClass(StringRef RegName, + bool IsPhysical) const { + auto It = std::find(RegisterOperandNames.begin(), RegisterOperandNames.end(), + RegName); + assert(It != RegisterOperandNames.end() && + "Register name not found in register operands"); + unsigned LocalIndex = std::distance(RegisterOperandNames.begin(), It); + return (IsPhysical ? Layout.PhyRegBase : Layout.VirtRegBase) + LocalIndex; +} + std::string MIRVocabulary::getStringKey(unsigned Pos) const { assert(Pos < Layout.TotalEntries && "Position out of bounds in vocabulary"); - // For now, all entries are opcodes since we only have one section - if (Pos < Layout.OperandBase && Pos < UniqueBaseOpcodeNames.size()) { + // Handle opcodes section + if (Pos < Layout.CommonOperandBase) { // Convert canonical index back to base opcode name auto It = UniqueBaseOpcodeNames.begin(); std::advance(It, Pos); + assert(It != UniqueBaseOpcodeNames.end() && + "Canonical index out of bounds in opcode section"); return *It; } - llvm_unreachable("Invalid position in vocabulary"); - return ""; + auto getLocalIndex = [](unsigned Pos, size_t BaseOffset, size_t Bound, + const char *Msg) { + unsigned LocalIndex = Pos - BaseOffset; + assert(LocalIndex < Bound && Msg); + return LocalIndex; + }; + + // Handle common operands section + if (Pos < Layout.PhyRegBase) { + unsigned LocalIndex = getLocalIndex( + Pos, Layout.CommonOperandBase, std::size(CommonOperandNames), + "Local index out of bounds in common operands"); + return CommonOperandNames[LocalIndex].str(); + } + + // Handle physical registers section + if (Pos < Layout.VirtRegBase) { + unsigned LocalIndex = + getLocalIndex(Pos, Layout.PhyRegBase, RegisterOperandNames.size(), + "Local index out of bounds in physical registers"); + return "PhyReg_" + RegisterOperandNames[LocalIndex]; + } + + // Handle virtual registers section + unsigned LocalIndex = + getLocalIndex(Pos, Layout.VirtRegBase, RegisterOperandNames.size(), + "Local index out of bounds in virtual registers"); + return "VirtReg_" + RegisterOperandNames[LocalIndex]; } -void MIRVocabulary::generateStorage(const VocabMap &OpcodeMap) { +void MIRVocabulary::generateStorage(const VocabMap &OpcodeMap, + const VocabMap &CommonOperandsMap, + const VocabMap &PhyRegMap, + const VocabMap &VirtRegMap) { // Helper for handling missing entities in the vocabulary. // Currently, we use a zero vector. In the future, we will throw an error to @@ -151,14 +236,14 @@ void MIRVocabulary::generateStorage(const VocabMap &OpcodeMap) { // Initialize opcode embeddings section unsigned EmbeddingDim = OpcodeMap.begin()->second.size(); - std::vector<Embedding> OpcodeEmbeddings(Layout.OperandBase, + std::vector<Embedding> OpcodeEmbeddings(Layout.CommonOperandBase, Embedding(EmbeddingDim)); // Populate opcode embeddings using canonical mapping for (auto COpcodeName : UniqueBaseOpcodeNames) { if (auto It = OpcodeMap.find(COpcodeName); It != OpcodeMap.end()) { auto COpcodeIndex = getCanonicalIndexForBaseName(COpcodeName); - assert(COpcodeIndex < Layout.OperandBase && + assert(COpcodeIndex < Layout.CommonOperandBase && "Canonical index out of bounds"); OpcodeEmbeddings[COpcodeIndex] = It->second; } else { @@ -166,8 +251,39 @@ void MIRVocabulary::generateStorage(const VocabMap &OpcodeMap) { } } - // TODO: Add operand/argument embeddings as additional sections - // This will require extending the vocabulary format and layout + // Initialize common operand embeddings section + std::vector<Embedding> CommonOperandEmbeddings(std::size(CommonOperandNames), + Embedding(EmbeddingDim)); + unsigned OperandIndex = 0; + for (const auto &CommonOperandName : CommonOperandNames) { + if (auto It = CommonOperandsMap.find(CommonOperandName.str()); + It != CommonOperandsMap.end()) { + CommonOperandEmbeddings[OperandIndex] = It->second; + } else { + handleMissingEntity(CommonOperandName); + } + ++OperandIndex; + } + + // Helper lambda for creating register operand embeddings + auto createRegisterEmbeddings = [&](const VocabMap &RegMap) { + std::vector<Embedding> RegEmbeddings(TRI.getNumRegClasses(), + Embedding(EmbeddingDim)); + unsigned RegOperandIndex = 0; + for (const auto &RegOperandName : RegisterOperandNames) { + if (auto It = RegMap.find(RegOperandName); It != RegMap.end()) + RegEmbeddings[RegOperandIndex] = It->second; + else + handleMissingEntity(RegOperandName); + ++RegOperandIndex; + } + return RegEmbeddings; + }; + + // Initialize register operand embeddings sections + std::vector<Embedding> PhyRegEmbeddings = createRegisterEmbeddings(PhyRegMap); + std::vector<Embedding> VirtRegEmbeddings = + createRegisterEmbeddings(VirtRegMap); // Scale the vocabulary sections based on the provided weights auto scaleVocabSection = [](std::vector<Embedding> &Embeddings, @@ -176,9 +292,20 @@ void MIRVocabulary::generateStorage(const VocabMap &OpcodeMap) { Embedding *= Weight; }; scaleVocabSection(OpcodeEmbeddings, OpcWeight); - - std::vector<std::vector<Embedding>> Sections(1); - Sections[0] = std::move(OpcodeEmbeddings); + scaleVocabSection(CommonOperandEmbeddings, CommonOperandWeight); + scaleVocabSection(PhyRegEmbeddings, RegOperandWeight); + scaleVocabSection(VirtRegEmbeddings, RegOperandWeight); + + std::vector<std::vector<Embedding>> Sections( + static_cast<unsigned>(Section::MaxSections)); + Sections[static_cast<unsigned>(Section::Opcodes)] = + std::move(OpcodeEmbeddings); + Sections[static_cast<unsigned>(Section::CommonOperands)] = + std::move(CommonOperandEmbeddings); + Sections[static_cast<unsigned>(Section::PhyRegisters)] = + std::move(PhyRegEmbeddings); + Sections[static_cast<unsigned>(Section::VirtRegisters)] = + std::move(VirtRegEmbeddings); Storage = ir2vec::VocabStorage(std::move(Sections)); } @@ -199,46 +326,130 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() { << " unique base opcodes\n"); } -Expected<MIRVocabulary> -MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII, - unsigned Dim) { +void MIRVocabulary::buildRegisterOperandMapping() { + // Check if already built + if (!RegisterOperandNames.empty()) + return; + + for (unsigned RC = 0; RC < TRI.getNumRegClasses(); ++RC) { + const TargetRegisterClass *RegClass = TRI.getRegClass(RC); + if (!RegClass) + continue; + + // Get the register class name + StringRef ClassName = TRI.getRegClassName(RegClass); + RegisterOperandNames.push_back(ClassName.str()); + } +} + +unsigned MIRVocabulary::getCommonOperandIndex( + MachineOperand::MachineOperandType OperandType) const { + assert(OperandType != MachineOperand::MO_Register && + "Expected non-register operand type"); + assert(OperandType > MachineOperand::MO_Register && + OperandType < MachineOperand::MO_Last && "Operand type out of bounds"); + return static_cast<unsigned>(OperandType) - 1; +} + +unsigned MIRVocabulary::getRegisterOperandIndex(Register Reg) const { + assert(!RegisterOperandNames.empty() && "Register operand mapping not built"); + assert(Reg.isValid() && "Invalid register; not expected here"); + assert((Reg.isPhysical() || Reg.isVirtual()) && + "Expected a physical or virtual register"); + + const TargetRegisterClass *RegClass = nullptr; + + // For physical registers, use TRI to get minimal register class as a + // physical register can belong to multiple classes. For virtual + // registers, use MRI to uniquely identify the assigned register class. + if (Reg.isPhysical()) + RegClass = TRI.getMinimalPhysRegClass(Reg); + else + RegClass = MRI.getRegClass(Reg); + + if (RegClass) + return RegClass->getID(); + // Fallback for registers without a class (shouldn't happen) + llvm_unreachable("Register operand without a valid register class"); + return 0; +} + +Expected<MIRVocabulary> MIRVocabulary::createDummyVocabForTest( + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, unsigned Dim) { assert(Dim > 0 && "Dimension must be greater than zero"); float DummyVal = 0.1f; - // Create dummy embeddings for all canonical opcode names - VocabMap DummyVocabMap; + VocabMap DummyOpcMap, DummyOperandMap, DummyPhyRegMap, DummyVirtRegMap; + + // Process opcodes directly without creating temporary vocabulary for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) { std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode)); - if (DummyVocabMap.count(BaseOpcode) == 0) { - // Only add if not already present - DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal); + if (DummyOpcMap.count(BaseOpcode) == 0) { // Only add if not already present + DummyOpcMap[BaseOpcode] = Embedding(Dim, DummyVal); DummyVal += 0.1f; } } - // Create and return vocabulary with dummy embeddings - return MIRVocabulary::create(std::move(DummyVocabMap), TII); + // Add common operands + for (const auto &CommonOperandName : CommonOperandNames) { + DummyOperandMap[CommonOperandName.str()] = Embedding(Dim, DummyVal); + DummyVal += 0.1f; + } + + // Process register classes directly + for (unsigned RC = 0; RC < TRI.getNumRegClasses(); ++RC) { + const TargetRegisterClass *RegClass = TRI.getRegClass(RC); + if (!RegClass) + continue; + + std::string ClassName = TRI.getRegClassName(RegClass); + DummyPhyRegMap[ClassName] = Embedding(Dim, DummyVal); + DummyVirtRegMap[ClassName] = Embedding(Dim, DummyVal); + DummyVal += 0.1f; + } + + // Create vocabulary directly without temporary instance + return MIRVocabulary::create( + std::move(DummyOpcMap), std::move(DummyOperandMap), + std::move(DummyPhyRegMap), std::move(DummyVirtRegMap), TII, TRI, MRI); } //===----------------------------------------------------------------------===// -// MIR2VecVocabLegacyAnalysis Implementation +// MIR2VecVocabProvider and MIR2VecVocabLegacyAnalysis //===----------------------------------------------------------------------===// -char MIR2VecVocabLegacyAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(MIR2VecVocabLegacyAnalysis, "mir2vec-vocab-analysis", - "MIR2Vec Vocabulary Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) -INITIALIZE_PASS_END(MIR2VecVocabLegacyAnalysis, "mir2vec-vocab-analysis", - "MIR2Vec Vocabulary Analysis", false, true) +Expected<mir2vec::MIRVocabulary> +MIR2VecVocabProvider::getVocabulary(const Module &M) { + VocabMap OpcVocab, CommonOperandVocab, PhyRegVocabMap, VirtRegVocabMap; -StringRef MIR2VecVocabLegacyAnalysis::getPassName() const { - return "MIR2Vec Vocabulary Analysis"; + if (Error Err = readVocabulary(OpcVocab, CommonOperandVocab, PhyRegVocabMap, + VirtRegVocabMap)) + return std::move(Err); + + for (const auto &F : M) { + if (F.isDeclaration()) + continue; + + if (auto *MF = MMI.getMachineFunction(F)) { + auto &Subtarget = MF->getSubtarget(); + if (const auto *TII = Subtarget.getInstrInfo()) + if (const auto *TRI = Subtarget.getRegisterInfo()) + return mir2vec::MIRVocabulary::create( + std::move(OpcVocab), std::move(CommonOperandVocab), + std::move(PhyRegVocabMap), std::move(VirtRegVocabMap), *TII, *TRI, + MF->getRegInfo()); + } + } + return createStringError(errc::invalid_argument, + "No machine functions found in module"); } -Error MIR2VecVocabLegacyAnalysis::readVocabulary() { - // TODO: Extend vocabulary format to support multiple sections - // (opcodes, operands, etc.) similar to IR2Vec structure +Error MIR2VecVocabProvider::readVocabulary(VocabMap &OpcodeVocab, + VocabMap &CommonOperandVocab, + VocabMap &PhyRegVocabMap, + VocabMap &VirtRegVocabMap) { if (VocabFile.empty()) return createStringError( errc::invalid_argument, @@ -255,39 +466,47 @@ Error MIR2VecVocabLegacyAnalysis::readVocabulary() { if (!ParsedVocabValue) return ParsedVocabValue.takeError(); - unsigned Dim = 0; + unsigned OpcodeDim = 0, CommonOperandDim = 0, PhyRegOperandDim = 0, + VirtRegOperandDim = 0; if (auto Err = ir2vec::VocabStorage::parseVocabSection( - "entities", *ParsedVocabValue, StrVocabMap, Dim)) + "Opcodes", *ParsedVocabValue, OpcodeVocab, OpcodeDim)) return Err; - return Error::success(); -} - -Expected<mir2vec::MIRVocabulary> -MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) { - if (StrVocabMap.empty()) { - if (Error Err = readVocabulary()) { - return std::move(Err); - } - } + if (auto Err = ir2vec::VocabStorage::parseVocabSection( + "CommonOperands", *ParsedVocabValue, CommonOperandVocab, + CommonOperandDim)) + return Err; - // Get machine module info to access machine functions and target info - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + if (auto Err = ir2vec::VocabStorage::parseVocabSection( + "PhysicalRegisters", *ParsedVocabValue, PhyRegVocabMap, + PhyRegOperandDim)) + return Err; - // Find first available machine function to get target instruction info - for (const auto &F : M) { - if (F.isDeclaration()) - continue; + if (auto Err = ir2vec::VocabStorage::parseVocabSection( + "VirtualRegisters", *ParsedVocabValue, VirtRegVocabMap, + VirtRegOperandDim)) + return Err; - if (auto *MF = MMI.getMachineFunction(F)) { - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - return mir2vec::MIRVocabulary::create(std::move(StrVocabMap), *TII); - } + // All sections must have the same embedding dimension + if (!(OpcodeDim == CommonOperandDim && CommonOperandDim == PhyRegOperandDim && + PhyRegOperandDim == VirtRegOperandDim)) { + return createStringError( + errc::illegal_byte_sequence, + "MIR2Vec vocabulary sections have different dimensions"); } - // No machine functions available - return error - return createStringError(errc::invalid_argument, - "No machine functions found in module"); + return Error::success(); +} + +char MIR2VecVocabLegacyAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(MIR2VecVocabLegacyAnalysis, "mir2vec-vocab-analysis", + "MIR2Vec Vocabulary Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) +INITIALIZE_PASS_END(MIR2VecVocabLegacyAnalysis, "mir2vec-vocab-analysis", + "MIR2Vec Vocabulary Analysis", false, true) + +StringRef MIR2VecVocabLegacyAnalysis::getPassName() const { + return "MIR2Vec Vocabulary Analysis"; } //===----------------------------------------------------------------------===// @@ -351,9 +570,14 @@ Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const { if (MI.isDebugInstr()) return Embedding(Dimension, 0); - // Todo: Add operand/argument contributions + // Opcode embedding + Embedding InstructionEmbedding = Vocab[MI.getOpcode()]; + + // Add operand contributions + for (const MachineOperand &MO : MI.operands()) + InstructionEmbedding += Vocab[MO]; - return Vocab[MI.getOpcode()]; + return InstructionEmbedding; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9..4795d81 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { if (expectAndConsume(MIToken::rparen)) return error("shufflemask should be terminated by ')'."); + if (ShufMask.size() < 2) + return error("shufflemask should have > 1 element"); + ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask); Dest = MachineOperand::CreateShuffleMask(MaskAlloc); return false; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 1cb57a4..ba0b025 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1137,7 +1137,7 @@ public: MF.setDelegate(this); } - ~SlotIndexUpdateDelegate() { + ~SlotIndexUpdateDelegate() override { MF.resetDelegate(this); for (auto MI : Insertions) Indexes->insertMachineInstrInMaps(*MI); diff --git a/llvm/lib/CodeGen/MachineBlockHashInfo.cpp b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp new file mode 100644 index 0000000..c4d9c0f --- /dev/null +++ b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp @@ -0,0 +1,115 @@ +//===- llvm/CodeGen/MachineBlockHashInfo.cpp---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Compute the hashes of basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +uint64_t hashBlock(const MachineBasicBlock &MBB, bool HashOperands) { + uint64_t Hash = 0; + for (const MachineInstr &MI : MBB) { + if (MI.isMetaInstruction() || MI.isTerminator()) + continue; + Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode()); + if (HashOperands) { + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + Hash = + hashing::detail::hash_16_bytes(Hash, hash_value(MI.getOperand(i))); + } + } + } + return Hash; +} + +/// Fold a 64-bit integer to a 16-bit one. +uint16_t fold_64_to_16(const uint64_t Value) { + uint16_t Res = static_cast<uint16_t>(Value); + Res ^= static_cast<uint16_t>(Value >> 16); + Res ^= static_cast<uint16_t>(Value >> 32); + Res ^= static_cast<uint16_t>(Value >> 48); + return Res; +} + +INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash", + "Machine Block Hash Analysis", true, true) + +char MachineBlockHashInfo::ID = 0; + +MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) { + initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry()); +} + +void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +struct CollectHashInfo { + uint64_t Offset; + uint64_t OpcodeHash; + uint64_t InstrHash; + uint64_t NeighborHash; +}; + +bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) { + DenseMap<const MachineBasicBlock *, CollectHashInfo> HashInfos; + uint16_t Offset = 0; + // Initialize hash components + for (const MachineBasicBlock &MBB : F) { + // offset of the machine basic block + HashInfos[&MBB].Offset = Offset; + Offset += MBB.size(); + // Hashing opcodes + HashInfos[&MBB].OpcodeHash = hashBlock(MBB, /*HashOperands=*/false); + // Hash complete instructions + HashInfos[&MBB].InstrHash = hashBlock(MBB, /*HashOperands=*/true); + } + + // Initialize neighbor hash + for (const MachineBasicBlock &MBB : F) { + uint64_t Hash = HashInfos[&MBB].OpcodeHash; + // Append hashes of successors + for (const MachineBasicBlock *SuccMBB : MBB.successors()) { + uint64_t SuccHash = HashInfos[SuccMBB].OpcodeHash; + Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); + } + // Append hashes of predecessors + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { + uint64_t PredHash = HashInfos[PredMBB].OpcodeHash; + Hash = hashing::detail::hash_16_bytes(Hash, PredHash); + } + HashInfos[&MBB].NeighborHash = Hash; + } + + // Assign hashes + for (const MachineBasicBlock &MBB : F) { + const auto &HashInfo = HashInfos[&MBB]; + BlendedBlockHash BlendedHash(fold_64_to_16(HashInfo.Offset), + fold_64_to_16(HashInfo.OpcodeHash), + fold_64_to_16(HashInfo.InstrHash), + fold_64_to_16(HashInfo.NeighborHash)); + MBBHashInfo[&MBB] = BlendedHash.combine(); + } + + return false; +} + +uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) { + return MBBHashInfo[&MBB]; +} + +MachineFunctionPass *llvm::createMachineBlockHashInfoPass() { + return new MachineBlockHashInfo(); +} diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1154855..c0710c4 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (Src0Ty != Src1Ty) report("Source operands must be the same type", MI); - if (Src0Ty.getScalarType() != DstTy.getScalarType()) + if (Src0Ty.getScalarType() != DstTy.getScalarType()) { report("G_SHUFFLE_VECTOR cannot change element type", MI); + break; + } + if (!Src0Ty.isVector()) { + report("G_SHUFFLE_VECTOR must have vector src", MI); + break; + } + if (!DstTy.isVector()) { + report("G_SHUFFLE_VECTOR must have vector dst", MI); + break; + } // Don't check that all operands are vector because scalars are used in // place of 1 element vectors. - int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; - int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + int SrcNumElts = Src0Ty.getNumElements(); + int DstNumElts = DstTy.getNumElements(); ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask(); diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index e1d39d6..493d8df 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -196,7 +196,7 @@ public: CopyRewriter(MachineInstr &MI) : Rewriter(MI) { assert(MI.isCopy() && "Expected copy instruction"); } - virtual ~CopyRewriter() = default; + ~CopyRewriter() override = default; bool getNextRewritableSource(RegSubRegPair &Src, RegSubRegPair &Dst) override { diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index f54e2f2..620d3d3 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -593,7 +593,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { case Intrinsic::log: Changed |= forEachCall(F, [&](CallInst *CI) { Type *Ty = CI->getArgOperand(0)->getType(); - if (!isa<ScalableVectorType>(Ty)) + if (!TM || !isa<ScalableVectorType>(Ty)) return false; const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering(); unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID()); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private: unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); } }; - using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; + using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>; /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d2ea652..8676060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - for (SDNode *Op : Ptr->users()) { + for (SDUse &U : Ptr->uses()) { + if (U.getResNo() != Ptr.getResNo()) + continue; + // Check for #1. + SDNode *Op = U.getUser(); if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 603dc34..9656a30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -890,6 +890,7 @@ private: SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f..bb4a8d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -69,6 +69,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N)); + break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -475,6 +478,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + N->getExtensionType(), SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90edaf3..379242e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8620,7 +8620,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, if (TLI->shouldPreservePtrArith(this->getMachineFunction().getFunction(), BasePtrVT)) return getNode(ISD::PTRADD, DL, BasePtrVT, Ptr, Offset, Flags); - return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); + // InBounds only applies to PTRADD, don't set it if we generate ADD. + SDNodeFlags AddFlags = Flags; + AddFlags.setInBounds(false); + return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, AddFlags); } /// Returns true if memcpy source is constant data. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dcf2df3..dee0909 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() { return updateRoot(PendingLoads); } +SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) { + // If the new exception behavior differs from that of the pending + // ones, chain up them and update the root. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + // Floating-point exceptions produced by such operations are not intended + // to be observed, so the sequence of these operations does not need to be + // preserved. + // + // They however must not be mixed with the instructions that have strict + // exception behavior. Placing an operation with 'ebIgnore' behavior between + // 'ebStrict' operations could distort the observed exception behavior. + if (!PendingConstrainedFPStrict.empty()) { + assert(PendingConstrainedFP.empty()); + updateRoot(PendingConstrainedFPStrict); + } + break; + case fp::ExceptionBehavior::ebStrict: + // Floating-point exception produced by these operations may be observed, so + // they must be correctly chained. If trapping on FP exceptions is + // disabled, the exceptions can be observed only by functions that read + // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that + // the order of operations is not significant between barriers. + // + // If trapping is enabled, each operation becomes an implicit observation + // point, so the operations must be sequenced according their original + // source order. + if (!PendingConstrainedFP.empty()) { + assert(PendingConstrainedFPStrict.empty()); + updateRoot(PendingConstrainedFP); + } + // TODO: Add support for trapping-enabled scenarios. + } + return DAG.getRoot(); +} + SDValue SelectionDAGBuilder::getRoot() { // Chain up all pending constrained intrinsics together with all // pending loads, by simply appending them to PendingLoads and @@ -3131,12 +3168,16 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, if (TLI.useLoadStackGuardNode(M)) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { - const Value *IRGuard = TLI.getSDagStackGuard(M); - SDValue GuardPtr = getValue(IRGuard); - - Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, - MachinePointerInfo(IRGuard, 0), Align, - MachineMemOperand::MOVolatile); + if (const Value *IRGuard = TLI.getSDagStackGuard(M)) { + SDValue GuardPtr = getValue(IRGuard); + Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, + MachinePointerInfo(IRGuard, 0), Align, + MachineMemOperand::MOVolatile); + } else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + Guard = DAG.getPOISON(PtrMemTy); + } } // Perform the comparison via a getsetcc. @@ -4386,6 +4427,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap())) Flags |= SDNodeFlags::NoUnsignedWrap; + Flags.setInBounds(NW.isInBounds()); N = DAG.getMemBasePlusOffset( N, DAG.getConstant(Offset, dl, N.getValueType()), dl, Flags); @@ -4429,6 +4471,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap())) Flags.setNoUnsignedWrap(true); + Flags.setInBounds(NW.isInBounds()); OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); @@ -4498,6 +4541,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // pointer index type (add nuw). SDNodeFlags AddFlags; AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); + AddFlags.setInBounds(NW.isInBounds()); N = DAG.getMemBasePlusOffset(N, IdxN, dl, AddFlags); } @@ -7324,6 +7368,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy); } else { const Value *Global = TLI.getSDagStackGuard(M); + if (!Global) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.diagnose(DiagnosticInfoGeneric("unable to lower stackguard")); + setValue(&I, DAG.getPOISON(PtrTy)); + return; + } + Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, @@ -8284,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } } +void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result, + fp::ExceptionBehavior EB) { + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + assert(OutChain.getValueType() == MVT::Other); + + // Instead of updating the root immediately, push the produced chain to the + // appropriate list, deferring the update until the root is requested. In this + // case, the nodes from the lists are chained using TokenFactor, indicating + // that the operations are independent. + // + // In particular, the root is updated before any call that might access the + // floating-point environment, except for constrained intrinsics. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + PendingConstrainedFPStrict.push_back(OutChain); + break; + } +} + void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); @@ -8291,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. - SDValue Chain = DAG.getRoot(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); + SDValue Chain = getFPOperationRoot(EB); SmallVector<SDValue, 4> Opers; Opers.push_back(Chain); for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) Opers.push_back(getValue(FPI.getArgOperand(I))); - auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { - assert(Result.getNode()->getNumValues() == 2); - - // Push node to the appropriate list so that future instructions can be - // chained up correctly. - SDValue OutChain = Result.getValue(1); - switch (EB) { - case fp::ExceptionBehavior::ebIgnore: - // The only reason why ebIgnore nodes still need to be chained is that - // they might depend on the current rounding mode, and therefore must - // not be moved across instruction that may change that mode. - [[fallthrough]]; - case fp::ExceptionBehavior::ebMayTrap: - // These must not be moved across calls or instructions that may change - // floating-point exception masks. - PendingConstrainedFP.push_back(OutChain); - break; - case fp::ExceptionBehavior::ebStrict: - // These must not be moved across calls or instructions that may change - // floating-point exception masks or read floating-point exception flags. - // In addition, they cannot be optimized out even if unused. - PendingConstrainedFPStrict.push_back(OutChain); - break; - } - }; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); SDVTList VTs = DAG.getVTList(VT, MVT::Other); - fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) @@ -8350,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); - pushOutChain(Mul, EB); + pushFPOpOutChain(Mul, EB); Opcode = ISD::STRICT_FADD; Opers.clear(); Opers.push_back(Mul.getValue(1)); @@ -8381,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); - pushOutChain(Result, EB); + pushFPOpOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c7577fa..47e19f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -195,6 +195,11 @@ private: /// Update root to include all chains from the Pending list. SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); + /// Given a node representing a floating-point operation and its specified + /// exception behavior, this either updates the root or stores the node in + /// a list to be added to chains latter. + void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB); + /// A unique monotonically increasing number used to order the SDNodes we /// create. unsigned SDNodeOrder; @@ -300,6 +305,13 @@ public: /// memory node that may need to be ordered after any prior load instructions. SDValue getMemoryRoot(); + /// Return the current virtual root of the Selection DAG, flushing + /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new + /// exception behavior (specified by \p EB) differs from that of the pending + /// instructions. This must be done before emitting constrained FP operation + /// call. + SDValue getFPOperationRoot(fp::ExceptionBehavior EB); + /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict) /// items. This must be done before emitting any call other any other node /// that may need to be ordered after FP instructions due to other side diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 39cbfad..77377d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -689,6 +689,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasSameSign()) OS << " samesign"; + if (getFlags().hasInBounds()) + OS << " inbounds"; + if (getFlags().hasNonNeg()) OS << " nneg"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 060b1dd..59798b3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { } Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { + // MSVC CRT has a function to validate security cookie. + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); return nullptr; } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index b6169e6..10b7238 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -272,6 +272,12 @@ static cl::opt<bool> cl::desc("Split static data sections into hot and cold " "sections using profile information")); +cl::opt<bool> EmitBBHash( + "emit-bb-hash", + cl::desc( + "Emit the hash of basic block in the SHT_LLVM_BB_ADDR_MAP section."), + cl::init(false), cl::Optional); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1281,6 +1287,8 @@ void TargetPassConfig::addMachinePasses() { // address map (or both). if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || TM->Options.BBAddrMap) { + if (EmitBBHash) + addPass(llvm::createMachineBlockHashInfoPass()); if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 6c7e27e..fa04976 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -247,7 +247,7 @@ public: StandardSegments(std::move(StandardSegments)), FinalizationSegments(std::move(FinalizationSegments)) {} - ~IPInFlightAlloc() { + ~IPInFlightAlloc() override { assert(!G && "InFlight alloc neither abandoned nor finalized"); } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 8d413a3..d029ac5 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -2901,13 +2901,23 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, for (auto &SN : ER.Ready) IL_collectQueries( - EQ.Updated, SN->defs(), + EQ.Completed, SN->defs(), [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); }, [](AsynchronousSymbolQuery &Q, JITDylib &JD, NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) { Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol()); }); + // std::erase_if is not available in C++17, and llvm::erase_if does not work + // here. + for (auto it = EQ.Completed.begin(), end = EQ.Completed.end(); it != end;) { + if ((*it)->isComplete()) { + ++it; + } else { + it = EQ.Completed.erase(it); + } + } + #ifdef EXPENSIVE_CHECKS verifySessionState("exiting ExecutionSession::IL_emit"); #endif @@ -3043,9 +3053,8 @@ Error ExecutionSession::OL_notifyEmitted( } } - for (auto &UQ : EmitQueries->Updated) - if (UQ->isComplete()) - UQ->handleComplete(*this); + for (auto &UQ : EmitQueries->Completed) + UQ->handleComplete(*this); // If there are any bad dependencies then return an error. if (!BadDeps.empty()) { diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp index 75ae80f..4ceff48 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp @@ -38,7 +38,7 @@ public: MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr) : G(G), RegisterActionAddr(RegisterActionAddr) {} - virtual ~MachODebugObjectSynthesizerBase() = default; + ~MachODebugObjectSynthesizerBase() override = default; Error preserveDebugSections() { if (G.findSectionByName(SynthDebugSectionName)) { diff --git a/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp index d1a6eaf..a2990ab 100644 --- a/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp @@ -55,7 +55,7 @@ public: Plugins = Layer.Plugins; } - ~JITLinkCtx() { + ~JITLinkCtx() override { // If there is an object buffer return function then use it to // return ownership of the buffer. if (Layer.ReturnObjectBuffer && ObjBuffer) diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index fd805fbf..cdde733 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -92,7 +92,7 @@ public: Name(std::move(Name)), Ctx(Ctx), Materialize(Materialize), Discard(Discard), Destroy(Destroy) {} - ~OrcCAPIMaterializationUnit() { + ~OrcCAPIMaterializationUnit() override { if (Ctx) Destroy(Ctx); } @@ -264,7 +264,7 @@ public: LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate) : Dispose(Dispose), Ctx(Ctx), TryToGenerate(TryToGenerate) {} - ~CAPIDefinitionGenerator() { + ~CAPIDefinitionGenerator() override { if (Dispose) Dispose(Ctx); } diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp index 407b6ad..1f53c87 100644 --- a/llvm/lib/Frontend/HLSL/CBuffer.cpp +++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp @@ -43,8 +43,13 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) { for (const MDNode *MD : CBufMD->operands()) { assert(MD->getNumOperands() && "Invalid cbuffer metadata"); - auto *Handle = cast<GlobalVariable>( - cast<ValueAsMetadata>(MD->getOperand(0))->getValue()); + // For an unused cbuffer, the handle may have been optimized out + Metadata *OpMD = MD->getOperand(0); + if (!OpMD) + continue; + + auto *Handle = + cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue()); CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle); for (int I = 1, E = MD->getNumOperands(); I < E; ++I) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 488b078..1096e57 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4082,10 +4082,10 @@ void AssemblyWriter::printTypeIdentities() { /// printFunction - Print all aspects of a function. void AssemblyWriter::printFunction(const Function *F) { - if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); - if (F->isMaterializable()) Out << "; Materializable\n"; + else if (AnnotationWriter) + AnnotationWriter->emitFunctionAnnot(F, Out); const AttributeList &Attrs = F->getAttributes(); if (Attrs.hasFnAttrs()) { diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 30b5e48..e19336e 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -403,9 +403,14 @@ void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val) { NamedMDNode *ModFlags = getOrInsertModuleFlagsMetadata(); // Replace the flag if it already exists. - for (MDNode *Flag : ModFlags->operands()) { + for (unsigned i = 0; i < ModFlags->getNumOperands(); ++i) { + MDNode *Flag = ModFlags->getOperand(i); if (cast<MDString>(Flag->getOperand(1))->getString() == Key) { - Flag->replaceOperandWith(2, Val); + Type *Int32Ty = Type::getInt32Ty(Context); + Metadata *Ops[3] = { + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Behavior)), + MDString::get(Context, Key), Val}; + ModFlags->setOperand(i, MDNode::get(Context, Ops)); return; } } diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index a6353664..62fd62c 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -111,11 +111,13 @@ uint64_t ModuleSummaryIndex::getFlags() const { Flags |= 0x100; if (hasUnifiedLTO()) Flags |= 0x200; + if (withInternalizeAndPromote()) + Flags |= 0x400; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0x2ff && "Unexpected bits in flag"); + assert(Flags <= 0x7ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -154,6 +156,10 @@ void ModuleSummaryIndex::setFlags(uint64_t Flags) { // Set on combined index only. if (Flags & 0x200) setUnifiedLTO(); + // 1 bit: WithInternalizeAndPromote flag. + // Set on combined index only. + if (Flags & 0x400) + setWithInternalizeAndPromote(); } // Collect for the given module the list of function it defines diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 03da154..7917712 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4446,10 +4446,12 @@ void Verifier::visitLoadInst(LoadInst &LI) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, "Load cannot have Release ordering", &LI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic load operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &LI); + checkAtomicMemAccessSize(ElTy, &LI); } else { Check(LI.getSyncScopeID() == SyncScope::System, @@ -4472,9 +4474,10 @@ void Verifier::visitStoreInst(StoreInst &SI) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, "Store cannot have Acquire ordering", &SI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic store operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { diff --git a/llvm/lib/IRReader/IRReader.cpp b/llvm/lib/IRReader/IRReader.cpp index a7e7dee..c16871f 100644 --- a/llvm/lib/IRReader/IRReader.cpp +++ b/llvm/lib/IRReader/IRReader.cpp @@ -8,6 +8,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm-c/IRReader.h" +#include "llvm/AsmParser/AsmParserContext.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/LLVMContext.h" @@ -68,7 +69,8 @@ std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename, std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context, - ParserCallbacks Callbacks) { + ParserCallbacks Callbacks, + llvm::AsmParserContext *ParserContext) { NamedRegionTimer T(TimeIRParsingName, TimeIRParsingDescription, TimeIRParsingGroupName, TimeIRParsingGroupDescription, TimePassesIsEnabled); @@ -88,12 +90,14 @@ std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, return parseAssembly(Buffer, Err, Context, nullptr, Callbacks.DataLayout.value_or( - [](StringRef, StringRef) { return std::nullopt; })); + [](StringRef, StringRef) { return std::nullopt; }), + ParserContext); } std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, - ParserCallbacks Callbacks) { + ParserCallbacks Callbacks, + AsmParserContext *ParserContext) { ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); if (std::error_code EC = FileOrErr.getError()) { @@ -102,7 +106,8 @@ std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, return nullptr; } - return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks); + return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks, + ParserContext); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index cbc0b1d..4bc2a18 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -471,11 +471,13 @@ static void thinLTOInternalizeAndPromoteGUID( ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) { - auto ExternallyVisibleCopies = - llvm::count_if(VI.getSummaryList(), - [](const std::unique_ptr<GlobalValueSummary> &Summary) { - return !GlobalValue::isLocalLinkage(Summary->linkage()); - }); + // Before performing index-based internalization and promotion for this GUID, + // the local flag should be consistent with the summary list linkage types. + VI.verifyLocal(); + + const bool SingleExternallyVisibleCopy = + VI.getSummaryList().size() == 1 && + !GlobalValue::isLocalLinkage(VI.getSummaryList().front()->linkage()); for (auto &S : VI.getSummaryList()) { // First see if we need to promote an internal value because it is not @@ -539,7 +541,9 @@ static void thinLTOInternalizeAndPromoteGUID( GlobalValue::isExternalWeakLinkage(S->linkage())) continue; - if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1) + // We may have a single summary copy that is externally visible but not + // prevailing if the prevailing copy is in a native object. + if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get())) S->setLinkage(GlobalValue::InternalLinkage); } } @@ -551,9 +555,11 @@ void llvm::thinLTOInternalizeAndPromoteInIndex( function_ref<bool(StringRef, ValueInfo)> isExported, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) { + assert(!Index.withInternalizeAndPromote()); for (auto &I : Index) thinLTOInternalizeAndPromoteGUID(Index.getValueInfo(I), isExported, isPrevailing); + Index.setWithInternalizeAndPromote(); } // Requires a destructor for std::vector<InputModule>. @@ -1080,15 +1086,15 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, GlobalValue::getGlobalIdentifier(Sym.getIRName(), GlobalValue::ExternalLinkage, "")); if (R.Prevailing) - ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()); } } if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), [&](GlobalValue::GUID GUID) { - return ThinLTO.PrevailingModuleForGUID[GUID] == - BM.getModuleIdentifier(); + return ThinLTO.isPrevailingModuleForGUID( + GUID, BM.getModuleIdentifier()); })) return Err; LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n"); @@ -1102,8 +1108,8 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, GlobalValue::getGlobalIdentifier(Sym.getIRName(), GlobalValue::ExternalLinkage, "")); if (R.Prevailing) { - assert(ThinLTO.PrevailingModuleForGUID[GUID] == - BM.getModuleIdentifier()); + assert( + ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier())); // For linker redefined symbols (via --wrap or --defsym) we want to // switch the linkage to `weak` to prevent IPOs from happening. @@ -1982,7 +1988,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, LocalWPDTargetsMap); auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + return ThinLTO.isPrevailingModuleForGUID(GUID, S->modulePath()); }; if (EnableMemProfContextDisambiguation) { MemProfContextDisambiguation ContextDisambiguation; @@ -2218,6 +2224,7 @@ class OutOfProcessThinBackend : public CGThinBackend { ArrayRef<StringRef> DistributorArgs; SString RemoteCompiler; + ArrayRef<StringRef> RemoteCompilerPrependArgs; ArrayRef<StringRef> RemoteCompilerArgs; bool SaveTemps; @@ -2254,12 +2261,14 @@ public: bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler, + ArrayRef<StringRef> RemoteCompilerPrependArgs, ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, ThinLTOParallelism), LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor), DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler), + RemoteCompilerPrependArgs(RemoteCompilerPrependArgs), RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {} virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, @@ -2381,6 +2390,11 @@ public: JOS.attributeArray("args", [&]() { JOS.value(RemoteCompiler); + // Forward any supplied prepend options. + if (!RemoteCompilerPrependArgs.empty()) + for (auto &A : RemoteCompilerPrependArgs) + JOS.value(A); + JOS.value("-c"); JOS.value(Saver.save("--target=" + Triple.str())); @@ -2511,6 +2525,7 @@ ThinBackend lto::createOutOfProcessThinBackend( bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler, + ArrayRef<StringRef> RemoteCompilerPrependArgs, ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) { auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, @@ -2520,7 +2535,7 @@ ThinBackend lto::createOutOfProcessThinBackend( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler, - RemoteCompilerArgs, SaveTemps); + RemoteCompilerPrependArgs, RemoteCompilerArgs, SaveTemps); }; return ThinBackend(Func, Parallelism); } diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt index 1e1d0a6..70c4577 100644 --- a/llvm/lib/MC/CMakeLists.txt +++ b/llvm/lib/MC/CMakeLists.txt @@ -73,9 +73,10 @@ add_llvm_component_library(LLVMMC ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC LINK_COMPONENTS + BinaryFormat + DebugInfoDWARFLowLevel Support TargetParser - BinaryFormat DEPENDS intrinsics_gen diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp index 98090d3..6670971 100644 --- a/llvm/lib/MC/MCAsmInfoELF.cpp +++ b/llvm/lib/MC/MCAsmInfoELF.cpp @@ -197,6 +197,8 @@ void MCAsmInfoELF::printSwitchToSection(const MCSection &Section, OS << "llvm_jt_sizes"; else if (Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE) OS << "llvm_cfi_jump_table"; + else if (Sec.Type == ELF::SHT_LLVM_CALL_GRAPH) + OS << "llvm_call_graph"; else OS << "0x" << Twine::utohexstr(Sec.Type); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index aee3c3b..b2f5000 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -554,7 +554,7 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { Ctx->getELFSection(".sframe", ELF::SHT_GNU_SFRAME, ELF::SHF_ALLOC); CallGraphSection = - Ctx->getELFSection(".llvm.callgraph", ELF::SHT_PROGBITS, 0); + Ctx->getELFSection(".llvm.callgraph", ELF::SHT_LLVM_CALL_GRAPH, 0); StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0); @@ -1172,7 +1172,7 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const { } return Ctx->getELFSection( - ".llvm.callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, + ".llvm.callgraph", ELF::SHT_LLVM_CALL_GRAPH, Flags, 0, GroupName, /*IsComdat=*/true, ElfSec.getUniqueID(), static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol())); } diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 6195355..1a3752f 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -637,6 +637,8 @@ EndStmt: Type = ELF::SHT_LLVM_JT_SIZES; else if (TypeName == "llvm_cfi_jump_table") Type = ELF::SHT_LLVM_CFI_JUMP_TABLE; + else if (TypeName == "llvm_call_graph") + Type = ELF::SHT_LLVM_CALL_GRAPH; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/llvm/lib/MC/MCSFrame.cpp b/llvm/lib/MC/MCSFrame.cpp index d6fa54c..e0a90df 100644 --- a/llvm/lib/MC/MCSFrame.cpp +++ b/llvm/lib/MC/MCSFrame.cpp @@ -8,6 +8,8 @@ #include "llvm/MC/MCSFrame.h" #include "llvm/BinaryFormat/SFrame.h" +#include "llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h" +#include "llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -211,8 +213,152 @@ class SFrameEmitterImpl { return true; } + // Technically, the escape data could be anything, but it is commonly a dwarf + // CFI program. Even then, it could contain an arbitrarily complicated Dwarf + // expression. Following gnu-gas, look for certain common cases that could + // invalidate an FDE, emit a warning for those sequences, and don't generate + // an FDE in those cases. Allow any that are known safe. It is likely that + // more thorough test cases could refine this code, but it handles the most + // important ones compatibly with gas. + // Returns true if the CFI escape sequence is safe for sframes. + bool isCFIEscapeSafe(SFrameFDE &FDE, const SFrameFRE &FRE, + const MCCFIInstruction &CFI) { + const MCAsmInfo *AI = Streamer.getContext().getAsmInfo(); + DWARFDataExtractorSimple data(CFI.getValues(), AI->isLittleEndian(), + AI->getCodePointerSize()); + + // Normally, both alignment factors are extracted from the enclosing Dwarf + // FDE or CIE. We don't have one here. Alignments are used for scaling + // factors for ops like CFA_def_cfa_offset_sf. But this particular function + // is only interested in registers. + dwarf::CFIProgram P(/*CodeAlignmentFactor=*/1, + /*DataAlignmentFactor=*/1, + Streamer.getContext().getTargetTriple().getArch()); + uint64_t Offset = 0; + if (P.parse(data, &Offset, CFI.getValues().size())) { + // Not a parsable dwarf expression. Assume the worst. + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + + // This loop deals with dwarf::CFIProgram::Instructions. Everywhere else + // this file deals with MCCFIInstructions. + for (const dwarf::CFIProgram::Instruction &I : P) { + switch (I.Opcode) { + case dwarf::DW_CFA_nop: + break; + case dwarf::DW_CFA_val_offset: { + // First argument is a register. Anything that touches CFA, FP, or RA is + // a problem, but allow others through. As an even more special case, + // allow SP + 0. + auto Reg = I.getOperandAsUnsigned(P, 0); + // The parser should have failed in this case. + assert(Reg && "DW_CFA_val_offset with no register."); + bool SPOk = true; + if (*Reg == SPReg) { + auto Opnd = I.getOperandAsSigned(P, 1); + if (!Opnd || *Opnd != 0) + SPOk = false; + } + if (!SPOk || *Reg == RAReg || *Reg == FPReg) { + StringRef RN = *Reg == SPReg + ? "SP reg " + : (*Reg == FPReg ? "FP reg " : "RA reg "); + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine( + "skipping SFrame FDE; .cfi_escape DW_CFA_val_offset with ") + + RN + Twine(*Reg)); + return false; + } + } break; + case dwarf::DW_CFA_expression: { + // First argument is a register. Anything that touches CFA, FP, or RA is + // a problem, but allow others through. + auto Reg = I.getOperandAsUnsigned(P, 0); + if (!Reg) { + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + if (*Reg == SPReg || *Reg == RAReg || *Reg == FPReg) { + StringRef RN = *Reg == SPReg + ? "SP reg " + : (*Reg == FPReg ? "FP reg " : "RA reg "); + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine( + "skipping SFrame FDE; .cfi_escape DW_CFA_expression with ") + + RN + Twine(*Reg)); + return false; + } + } break; + case dwarf::DW_CFA_GNU_args_size: { + auto Size = I.getOperandAsSigned(P, 0); + // Zero size doesn't affect the cfa. + if (Size && *Size == 0) + break; + if (FRE.Info.getBaseRegister() != BaseReg::FP) { + Streamer.getContext().reportWarning( + CFI.getLoc(), + Twine("skipping SFrame FDE; .cfi_escape DW_CFA_GNU_args_size " + "with non frame-pointer CFA")); + return false; + } + } break; + // Cases that gas doesn't specially handle. TODO: Some of these could be + // analyzed and handled instead of just punting. But these are uncommon, + // or should be written as normal cfi directives. Some will need fixes to + // the scaling factor. + case dwarf::DW_CFA_advance_loc: + case dwarf::DW_CFA_offset: + case dwarf::DW_CFA_restore: + case dwarf::DW_CFA_set_loc: + case dwarf::DW_CFA_advance_loc1: + case dwarf::DW_CFA_advance_loc2: + case dwarf::DW_CFA_advance_loc4: + case dwarf::DW_CFA_offset_extended: + case dwarf::DW_CFA_restore_extended: + case dwarf::DW_CFA_undefined: + case dwarf::DW_CFA_same_value: + case dwarf::DW_CFA_register: + case dwarf::DW_CFA_remember_state: + case dwarf::DW_CFA_restore_state: + case dwarf::DW_CFA_def_cfa: + case dwarf::DW_CFA_def_cfa_register: + case dwarf::DW_CFA_def_cfa_offset: + case dwarf::DW_CFA_def_cfa_expression: + case dwarf::DW_CFA_offset_extended_sf: + case dwarf::DW_CFA_def_cfa_sf: + case dwarf::DW_CFA_def_cfa_offset_sf: + case dwarf::DW_CFA_val_offset_sf: + case dwarf::DW_CFA_val_expression: + case dwarf::DW_CFA_MIPS_advance_loc8: + case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: + case dwarf::DW_CFA_AARCH64_negate_ra_state: + case dwarf::DW_CFA_LLVM_def_aspace_cfa: + case dwarf::DW_CFA_LLVM_def_aspace_cfa_sf: + Streamer.getContext().reportWarning( + CFI.getLoc(), "skipping SFrame FDE; .cfi_escape " + "CFA expression with unknown side effects"); + return false; + default: + // Dwarf expression was only partially valid, and user could have + // written anything. + Streamer.getContext().reportWarning( + CFI.getLoc(), + "skipping SFrame FDE; .cfi_escape with unknown effects"); + return false; + } + } + return true; + } + // Add the effects of CFI to the current FDE, creating a new FRE when - // necessary. + // necessary. Return true if the CFI is representable in the sframe format. bool handleCFI(SFrameFDE &FDE, SFrameFRE &FRE, const MCCFIInstruction &CFI) { switch (CFI.getOperation()) { case MCCFIInstruction::OpDefCfaRegister: @@ -265,10 +411,11 @@ class SFrameEmitterImpl { FRE = FDE.SaveState.pop_back_val(); return true; case MCCFIInstruction::OpEscape: - // TODO: Implement. Will use FDE. - return true; + // This is a string of bytes that contains an arbitrary dwarf-expression + // that may or may not affect unwind info. + return isCFIEscapeSafe(FDE, FRE, CFI); default: - // Instructions that don't affect the CFA, RA, and SP can be safely + // Instructions that don't affect the CFA, RA, and FP can be safely // ignored. return true; } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index f256e7b..6da97f9 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -322,6 +322,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LTO); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_JT_SIZES) STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CFI_JUMP_TABLE) + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_SFRAME); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index c3a27c9..f8a84b0 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -744,6 +744,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration( ECase(SHT_LLVM_BB_ADDR_MAP); ECase(SHT_LLVM_OFFLOADING); ECase(SHT_LLVM_LTO); + ECase(SHT_LLVM_CALL_GRAPH); ECase(SHT_GNU_SFRAME); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 048c58d..3c9a27a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -669,7 +669,14 @@ void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { FAM.registerPass([&] { return buildDefaultAAPipeline(); }); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ - FAM.registerPass([&] { return CREATE_PASS; }); + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CREATE_PASS)>, \ + const TargetMachine &>) { \ + if (TM) \ + FAM.registerPass([&] { return CREATE_PASS; }); \ + } else { \ + FAM.registerPass([&] { return CREATE_PASS; }); \ + } #include "PassRegistry.def" for (auto &C : FunctionAnalysisRegistrationCallbacks) @@ -2038,6 +2045,14 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, } #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CREATE_PASS)>, \ + const TargetMachine &>) { \ + if (!TM) \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } @@ -2046,6 +2061,18 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ + auto CreatePass = CREATE_PASS; \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CreatePass( \ + Params.get()))>, \ + const TargetMachine &, \ + std::remove_reference_t<decltype(Params.get())>>) { \ + if (!TM) { \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ + } \ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } @@ -2152,6 +2179,14 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CREATE_PASS)>, \ + const TargetMachine &>) { \ + if (!TM) \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } @@ -2160,6 +2195,18 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ + auto CreatePass = CREATE_PASS; \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CreatePass( \ + Params.get()))>, \ + const TargetMachine &, \ + std::remove_reference_t<decltype(Params.get())>>) { \ + if (!TM) { \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ + } \ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } @@ -2239,6 +2286,14 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, // Now expand the basic registered passes from the .inc file. #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CREATE_PASS)>, \ + const TargetMachine &>) { \ + if (!TM) \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ FPM.addPass(CREATE_PASS); \ return Error::success(); \ } @@ -2247,14 +2302,34 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ + auto CreatePass = CREATE_PASS; \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CreatePass( \ + Params.get()))>, \ + const TargetMachine &, \ + std::remove_reference_t<decltype(Params.get())>>) { \ + if (!TM) { \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ + } \ FPM.addPass(CREATE_PASS(Params.get())); \ return Error::success(); \ } #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ + if constexpr (std::is_constructible_v< \ + std::remove_reference_t<decltype(CREATE_PASS)>, \ + const TargetMachine &>) { \ + if (!TM) \ + return make_error<StringError>( \ + formatv("pass '{0}' requires TargetMachine", Name).str(), \ + inconvertibleErrorCode()); \ + } \ FPM.addPass( \ - RequireAnalysisPass< \ - std::remove_reference_t<decltype(CREATE_PASS)>, Function>()); \ + RequireAnalysisPass<std::remove_reference_t<decltype(CREATE_PASS)>, \ + Function>()); \ return Error::success(); \ } \ if (Name == "invalidate<" NAME ">") { \ diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index a66b6e4..1853cdd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -345,7 +345,7 @@ FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("bb-sections-profile-reader", - BasicBlockSectionsProfileReaderAnalysis(TM)) + BasicBlockSectionsProfileReaderAnalysis(*TM)) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) @@ -356,7 +356,7 @@ FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("ephemerals", EphemeralValuesAnalysis()) FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis()) -FUNCTION_ANALYSIS("machine-function-info", MachineFunctionAnalysis(TM)) +FUNCTION_ANALYSIS("machine-function-info", MachineFunctionAnalysis(*TM)) FUNCTION_ANALYSIS("gc-function", GCFunctionAnalysis()) FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis()) FUNCTION_ANALYSIS("last-run-tracking", LastRunTrackingAnalysis()) @@ -406,14 +406,14 @@ FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("annotation-remarks", AnnotationRemarksPass()) FUNCTION_PASS("assume-builder", AssumeBuilderPass()) FUNCTION_PASS("assume-simplify", AssumeSimplifyPass()) -FUNCTION_PASS("atomic-expand", AtomicExpandPass(TM)) +FUNCTION_PASS("atomic-expand", AtomicExpandPass(*TM)) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callbr-prepare", CallBrPreparePass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) -FUNCTION_PASS("codegenprepare", CodeGenPreparePass(TM)) -FUNCTION_PASS("complex-deinterleaving", ComplexDeinterleavingPass(TM)) +FUNCTION_PASS("codegenprepare", CodeGenPreparePass(*TM)) +FUNCTION_PASS("complex-deinterleaving", ComplexDeinterleavingPass(*TM)) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) @@ -430,10 +430,10 @@ FUNCTION_PASS("dot-dom-only", DomOnlyPrinter()) FUNCTION_PASS("dot-post-dom", PostDomPrinter()) FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter()) FUNCTION_PASS("dse", DSEPass()) -FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM)) +FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(*TM)) FUNCTION_PASS("drop-unnecessary-assumes", DropUnnecessaryAssumesPass()) -FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) -FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) +FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(*TM)) +FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(*TM)) FUNCTION_PASS("expand-reductions", ExpandReductionsPass()) FUNCTION_PASS("extra-vector-passes", ExtraFunctionPassManager<ShouldRunExtraVectorPasses>()) @@ -446,15 +446,15 @@ FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("gvn-sink", GVNSinkPass()) FUNCTION_PASS("helloworld", HelloWorldPass()) -FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(TM)) +FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass(*TM)) FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass()) FUNCTION_PASS("infer-alignment", InferAlignmentPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instnamer", InstructionNamerPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) -FUNCTION_PASS("interleaved-access", InterleavedAccessPass(TM)) -FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass(TM)) +FUNCTION_PASS("interleaved-access", InterleavedAccessPass(*TM)) +FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass(*TM)) FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass()) FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) @@ -533,25 +533,25 @@ FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib()) FUNCTION_PASS("reg2mem", RegToMemPass()) -FUNCTION_PASS("safe-stack", SafeStackPass(TM)) +FUNCTION_PASS("safe-stack", SafeStackPass(*TM)) FUNCTION_PASS("sandbox-vectorizer", SandboxVectorizerPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("sccp", SCCPPass()) -FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) +FUNCTION_PASS("select-optimize", SelectOptimizePass(*TM)) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) -FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) +FUNCTION_PASS("stack-protector", StackProtectorPass(*TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("trigger-crash-function", TriggerCrashFunctionPass()) FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) -FUNCTION_PASS("typepromotion", TypePromotionPass(TM)) +FUNCTION_PASS("typepromotion", TypePromotionPass(*TM)) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) @@ -730,7 +730,7 @@ FUNCTION_PASS_WITH_PARAMS( FUNCTION_PASS_WITH_PARAMS( "expand-fp", "ExpandFpPass", [TM = TM](CodeGenOptLevel OL) { - return ExpandFpPass(TM, OL); + return ExpandFpPass(*TM, OL); }, parseExpandFpOptions, "O0;O1;O2;O3") diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47d..2715229 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) { return std::move(SubPatterns); } +static StringRef maxPlainSubstring(StringRef S) { + StringRef Best; + while (!S.empty()) { + size_t PrefixSize = S.find_first_of("?*[{\\"); + if (PrefixSize == std::string::npos) + PrefixSize = S.size(); + + if (Best.size() < PrefixSize) + Best = S.take_front(PrefixSize); + + S = S.drop_front(PrefixSize); + + // It's impossible, as the first and last characters of the input string + // must be Glob special characters, otherwise they would be parts of + // the prefix or the suffix. + assert(!S.empty()); + + switch (S.front()) { + case '\\': + S = S.drop_front(2); + break; + case '[': { + // Drop '[' and the first character which can be ']'. + S = S.drop_front(2); + size_t EndBracket = S.find_first_of("]"); + // Should not be possible, SubGlobPattern::create should fail on invalid + // pattern before we get here. + assert(EndBracket != std::string::npos); + S = S.drop_front(EndBracket + 1); + break; + } + case '{': + // TODO: implement. + // Fallback to whatever is best for now. + return Best; + default: + S = S.drop_front(1); + } + } + + return Best; +} + Expected<GlobPattern> GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector<std::string, 1> SubPats; @@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } +StringRef GlobPattern::longest_substr() const { + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); +} + bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index f74e52a..c27f627 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -89,14 +89,32 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { return A.Name.size() < B.Name.size(); }); } + + for (const auto &G : reverse(Globs)) { + StringRef Prefix = G.Pattern.prefix(); + + auto &V = PrefixToGlob.emplace(Prefix).first->second; + V.emplace_back(&G); + } } void SpecialCaseList::GlobMatcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { - for (const auto &G : reverse(Globs)) - if (G.Pattern.match(Query)) - return Cb(G.Name, G.LineNo); + if (!PrefixToGlob.empty()) { + for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) { + for (const auto *G : V) { + if (G->Pattern.match(Query)) { + Cb(G->Name, G->LineNo); + // As soon as we find a match in the vector, we can break for this + // vector, since the globs are already sorted by priority within the + // prefix group. However, we continue searching other prefix groups in + // the map, as they may contain a better match overall. + break; + } + } + } + } } SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index 67483ba..9d45096 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -240,7 +240,8 @@ private: getGroupEntry(StringRef GroupName, StringRef GroupDescription) { std::pair<TimerGroup *, Name2TimerMap> &GroupEntry = Map[GroupName]; if (!GroupEntry.first) - GroupEntry.first = new TimerGroup(GroupName, GroupDescription); + GroupEntry.first = + new TimerGroup(GroupName, GroupDescription, /*PrintOnExit=*/true); return GroupEntry; } @@ -270,9 +271,10 @@ TimerGroup &NamedRegionTimer::getNamedTimerGroup(StringRef GroupName, static TimerGroup *TimerGroupList = nullptr; TimerGroup::TimerGroup(StringRef Name, StringRef Description, - sys::SmartMutex<true> &lock) + sys::SmartMutex<true> &lock, bool PrintOnExit) : Name(Name.begin(), Name.end()), - Description(Description.begin(), Description.end()) { + Description(Description.begin(), Description.end()), + PrintOnExit(PrintOnExit) { // Add the group to TimerGroupList. sys::SmartScopedLock<true> L(lock); if (TimerGroupList) @@ -282,12 +284,12 @@ TimerGroup::TimerGroup(StringRef Name, StringRef Description, TimerGroupList = this; } -TimerGroup::TimerGroup(StringRef Name, StringRef Description) - : TimerGroup(Name, Description, timerLock()) {} +TimerGroup::TimerGroup(StringRef Name, StringRef Description, bool PrintOnExit) + : TimerGroup(Name, Description, timerLock(), PrintOnExit) {} TimerGroup::TimerGroup(StringRef Name, StringRef Description, - const StringMap<TimeRecord> &Records) - : TimerGroup(Name, Description) { + const StringMap<TimeRecord> &Records, bool PrintOnExit) + : TimerGroup(Name, Description, PrintOnExit) { TimersToPrint.reserve(Records.size()); for (const auto &P : Records) TimersToPrint.emplace_back(P.getValue(), std::string(P.getKey()), @@ -301,7 +303,7 @@ TimerGroup::~TimerGroup() { while (FirstTimer) removeTimer(*FirstTimer); - if (!TimersToPrint.empty()) { + if (!TimersToPrint.empty() && PrintOnExit) { std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile(); PrintQueuedTimers(*OutStream); } @@ -530,7 +532,7 @@ public: sys::SmartMutex<true> TimerLock; TimerGroup DefaultTimerGroup{"misc", "Miscellaneous Ungrouped Timers", - TimerLock}; + TimerLock, /*PrintOnExit=*/true}; SignpostEmitter Signposts; // Order of these members and initialization below is important. For example diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 86f9548..a4529a5 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -73,9 +73,16 @@ def SVEUnsupported : AArch64Unsupported { SVE2Unsupported.F); } -let F = [HasSME2p2, HasSVE2p2_or_SME2p2, HasNonStreamingSVE_or_SME2p2, - HasNonStreamingSVE2p2_or_SME2p2] in -def SME2p2Unsupported : AArch64Unsupported; +def SME2p3Unsupported : AArch64Unsupported { + let F = [HasSVE2p3_or_SME2p3, HasSVE_B16MM]; +} + +def SME2p2Unsupported : AArch64Unsupported { + let F = !listconcat([HasSME2p2, HasSVE2p2_or_SME2p2, + HasNonStreamingSVE_or_SME2p2, + HasNonStreamingSVE2p2_or_SME2p2], + SME2p3Unsupported.F); +} def SME2p1Unsupported : AArch64Unsupported { let F = !listconcat([HasSME2p1, HasSVE2p1_or_SME2p1, diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index ecaeff7..b3ec65c 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -71,7 +71,6 @@ def AArch64PreLegalizerCombiner: GICombiner< "AArch64PreLegalizerCombinerImpl", [all_combines, icmp_redundant_trunc, fold_global_offset, - shuffle_to_extract, ext_addv_to_udot_addv, ext_uaddv_to_uaddlv, push_sub_through_zext, diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 46f5f0c..0e94b78 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -585,6 +585,47 @@ def FeatureSME_TMOP: ExtensionWithMArch<"sme-tmop", "SME_TMOP", "FEAT_SME_TMOP", def FeatureSSVE_FEXPA : ExtensionWithMArch<"ssve-fexpa", "SSVE_FEXPA", "FEAT_SSVE_FEXPA", "Enable SVE FEXPA instruction in Streaming SVE mode", [FeatureSME2]>; +//===----------------------------------------------------------------------===// +// Armv9.7 Architecture Extensions +//===----------------------------------------------------------------------===// + +def FeatureCMH : ExtensionWithMArch<"cmh", "CMH", "FEAT_CMH", + "Enable Armv9.7-A Contention Management Hints">; + +def FeatureLSCP : ExtensionWithMArch<"lscp", "LSCP", "FEAT_LSCP", + "Enable Armv9.7-A Load-acquire and store-release pair extension">; + +def FeatureTLBID: ExtensionWithMArch<"tlbid", "TLBID", "FEAT_TLBID", + "Enable Armv9.7-A TLBI Domains extension">; + +def FeatureMPAMv2: ExtensionWithMArch<"mpamv2", "MPAMv2", "FEAT_MPAMv2", + "Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions">; + +def FeatureMTETC: ExtensionWithMArch<"mtetc", "MTETC", "FEAT_MTETC", + "Enable Virtual Memory Tagging Extension">; + +def FeatureGCIE: ExtensionWithMArch<"gcie", "GCIE", "FEAT_GCIE", + "Enable GICv5 (Generic Interrupt Controller) CPU Interface Extension">; + +def FeatureSVE2p3 : ExtensionWithMArch<"sve2p3", "SVE2p3", "FEAT_SVE2p3", + "Enable Armv9.7-A Scalable Vector Extension 2.3 instructions", [FeatureSVE2p2]>; + +def FeatureSME2p3 : ExtensionWithMArch<"sme2p3", "SME2p3", "FEAT_SME2p3", + "Enable Armv9.7-A Scalable Matrix Extension 2.3 instructions", [FeatureSME2p2]>; + +def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B16MM", + "Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate", [FeatureSVE]>; + +def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM", + "Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>; + +def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT", + "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>; + +def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM", + "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>; + +//===----------------------------------------------------------------------===// // Other Features //===----------------------------------------------------------------------===// @@ -939,9 +980,12 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a", [HasV9_4aOps, FeatureCPA], !listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>; def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a", - [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2, FeatureLSUI, FeatureOCCMO], - !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2, + [HasV9_5aOps, FeatureCMPBR, FeatureLSUI, FeatureOCCMO], + !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureLSUI, FeatureOCCMO])>; +def HasV9_7aOps : Architecture64<9, 7, "a", "v9.7a", + [HasV9_6aOps, FeatureSVE2p3, FeatureFPRCVT], + !listconcat(HasV9_6aOps.DefaultExts, [FeatureSVE2p3, FeatureFPRCVT])>; def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", [ //v8.1 FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a81de5c..d16b116 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9002,12 +9002,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, } static SMECallAttrs -getSMECallAttrs(const Function &Caller, const AArch64TargetLowering &TLI, +getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI, const TargetLowering::CallLoweringInfo &CLI) { if (CLI.CB) - return SMECallAttrs(*CLI.CB, &TLI); + return SMECallAttrs(*CLI.CB, &RTLCI); if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) - return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI)); + return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), RTLCI)); return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal)); } @@ -9029,7 +9029,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // SME Streaming functions are not eligible for TCO as they may require // the streaming mode or ZA to be restored after returning from the call. - SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI); + SMECallAttrs CallAttrs = + getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI); if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState() || CallAttrs.caller().hasStreamingBody()) @@ -9454,7 +9455,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } // Determine whether we need any streaming mode changes. - SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI); + SMECallAttrs CallAttrs = + getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI); std::optional<unsigned> ZAMarkerNode; bool UseNewSMEABILowering = getTM().useNewSMEABILowering(); @@ -19476,6 +19478,61 @@ static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) { Op1 ? Op1 : Mul->getOperand(1)); } +// Multiplying an RDSVL value by a constant can sometimes be done cheaper by +// folding a power-of-two factor of the constant into the RDSVL immediate and +// compensating with an extra shift. +// +// We rewrite: +// (mul (srl (rdsvl 1), w), x) +// to one of: +// (shl (rdsvl y), z) if z > 0 +// (srl (rdsvl y), abs(z)) if z < 0 +// where integers y, z satisfy x = y * 2^(w + z) and y ∈ [-32, 31]. +static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG) { + SDLoc DL(Mul); + EVT VT = Mul->getValueType(0); + SDValue MulOp0 = Mul->getOperand(0); + int ConstMultiplier = + cast<ConstantSDNode>(Mul->getOperand(1))->getSExtValue(); + if ((MulOp0->getOpcode() != ISD::SRL) || + (MulOp0->getOperand(0).getOpcode() != AArch64ISD::RDSVL)) + return SDValue(); + + unsigned AbsConstValue = abs(ConstMultiplier); + unsigned OperandShift = + cast<ConstantSDNode>(MulOp0->getOperand(1))->getZExtValue(); + + // z ≤ ctz(|x|) - w (largest extra shift we can take while keeping y + // integral) + int UpperBound = llvm::countr_zero(AbsConstValue) - OperandShift; + + // To keep y in range, with B = 31 for x > 0 and B = 32 for x < 0, we need: + // 2^(w + z) ≥ ceil(x / B) ⇒ z ≥ ceil_log2(ceil(x / B)) - w (LowerBound). + unsigned B = ConstMultiplier < 0 ? 32 : 31; + unsigned CeilAxOverB = (AbsConstValue + (B - 1)) / B; // ceil(|x|/B) + int LowerBound = llvm::Log2_32_Ceil(CeilAxOverB) - OperandShift; + + // No valid solution found. + if (LowerBound > UpperBound) + return SDValue(); + + // Any value of z in [LowerBound, UpperBound] is valid. Prefer no extra + // shift if possible. + int Shift = std::min(std::max(/*prefer*/ 0, LowerBound), UpperBound); + + // y = x / 2^(w + z) + int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) * + (ConstMultiplier < 0 ? -1 : 1); + auto Rdsvl = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getSignedConstant(RdsvlMul, DL, MVT::i32)); + + if (Shift == 0) + return Rdsvl; + return DAG.getNode(Shift < 0 ? ISD::SRL : ISD::SHL, DL, VT, Rdsvl, + DAG.getConstant(abs(Shift), DL, MVT::i32), + SDNodeFlags::Exact); +} + // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz // Same for other types with equivalent constants. static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) { @@ -19604,6 +19661,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (!isa<ConstantSDNode>(N1)) return SDValue(); + if (SDValue Ext = performMulRdsvlCombine(N, DAG)) + return Ext; + ConstantSDNode *C = cast<ConstantSDNode>(N1); const APInt &ConstValue = C->getAPIntValue(); @@ -26665,11 +26725,34 @@ static SDValue performDUPCombine(SDNode *N, } if (N->getOpcode() == AArch64ISD::DUP) { + SDValue Op = N->getOperand(0); + + // Optimize DUP(extload/zextload i8/i16/i32) to avoid GPR->FPR transfer. + // For example: + // v4i32 = DUP (i32 (zextloadi8 addr)) + // => + // v4i32 = SCALAR_TO_VECTOR (i32 (zextloadi8 addr)) ; Matches to ldr b0 + // v4i32 = DUPLANE32 (v4i32), 0 + if (auto *LD = dyn_cast<LoadSDNode>(Op)) { + ISD::LoadExtType ExtType = LD->getExtensionType(); + EVT MemVT = LD->getMemoryVT(); + EVT ElemVT = VT.getVectorElementType(); + if ((ExtType == ISD::EXTLOAD || ExtType == ISD::ZEXTLOAD) && + (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) && + ElemVT != MemVT && LD->hasOneUse()) { + EVT Vec128VT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT, + 128 / ElemVT.getSizeInBits()); + SDValue ScalarToVec = + DCI.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, Op); + return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, ScalarToVec, + DCI.DAG.getConstant(0, DL, MVT::i64)); + } + } + // If the instruction is known to produce a scalar in SIMD registers, we can // duplicate it across the vector lanes using DUPLANE instead of moving it // to a GPR first. For example, this allows us to handle: // v4i32 = DUP (i32 (FCMGT (f32, f32))) - SDValue Op = N->getOperand(0); // FIXME: Ideally, we should be able to handle all instructions that // produce a scalar value in FPRs. if (Op.getOpcode() == AArch64ISD::FCMEQ || @@ -29430,15 +29513,6 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const { TargetLowering::insertSSPDeclarations(M); } -Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const { - // MSVC CRT has a function to validate security cookie. - RTLIB::LibcallImpl SecurityCheckCookieLibcall = - getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); - if (SecurityCheckCookieLibcall != RTLIB::Unsupported) - return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); - return TargetLowering::getSSPStackGuardCheck(M); -} - Value * AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { // Android provides a fixed TLS slot for the SafeStack pointer. See the @@ -29447,11 +29521,6 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { if (Subtarget->isTargetAndroid()) return UseTlsOffset(IRB, 0x48); - // Fuchsia is similar. - // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. - if (Subtarget->isTargetFuchsia()) - return UseTlsOffset(IRB, -0x8); - return TargetLowering::getSafeStackPointerLocation(IRB); } @@ -29769,7 +29838,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { // Checks to allow the use of SME instructions if (auto *Base = dyn_cast<CallBase>(&Inst)) { - auto CallAttrs = SMECallAttrs(*Base, this); + auto CallAttrs = SMECallAttrs(*Base, &getRuntimeLibcallsInfo()); if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingZT0() || CallAttrs.requiresPreservingAllZAState()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 9495c9f..2cb8ed2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -366,7 +366,6 @@ public: Value *getIRStackGuard(IRBuilderBase &IRB) const override; void insertSSPDeclarations(Module &M) const override; - Function *getSSPStackGuardCheck(const Module &M) const override; /// If the target has a standard location for the unsafe stack pointer, /// returns the address of that location. Otherwise, returns nullptr. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 09ce713..58a53af 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1894,6 +1894,21 @@ def btihint_op : Operand<i32> { }]; } +def CMHPriorityHintOperand : AsmOperandClass { + let Name = "CMHPriorityHint"; + let ParserMethod = "tryParseCMHPriorityHint"; +} + +def CMHPriorityHint_op : Operand<i32> { + let ParserMatchClass = CMHPriorityHintOperand; + let PrintMethod = "printCMHPriorityHintOp"; + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + return AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(MCOp.getImm()) != nullptr; + }]; +} + class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), "mrs", "\t$Rt, $systemreg"> { bits<16> systemreg; @@ -4636,6 +4651,48 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterOperand regtype, GPR64sp:$Rn, 0)>; } +class BaseLoadStoreAcquirePairOffset<bits<4> opc, bit L, dag oops, dag iops, + string asm> + : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, #0]", "", []> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{31-23} = 0b110110010; + let Inst{22} = L; + let Inst{21} = 0b0; + let Inst{20-16} = Rt2; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass LoadAcquirePairOffset<bits<4> opc, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStoreAcquirePairOffset<opc, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64sp:$Rn), asm>, + Sched<[WriteAtomic, WriteLDHi]>; + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2, + GPR64sp:$Rn)>; +} + + +multiclass StoreAcquirePairOffset<bits<4> opc, string asm> { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in + def i : BaseLoadStoreAcquirePairOffset<opc, 0b0, (outs), + (ins GPR64:$Rt, GPR64:$Rt2, + GPR64sp:$Rn), + asm>, + Sched<[WriteSTP]>; + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2, + GPR64sp:$Rn)>; +} + // (pre-indexed) class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops, string asm> @@ -5241,7 +5298,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm, } multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, - SDPatternOperator OpN = null_frag> { + SDPatternOperator OpN> { // double-precision to 32-bit SIMD/FPR def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm, [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> { @@ -6481,8 +6538,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm, } multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> { - - def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b", + def v16i8_v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b", V128, v8f16, v16i8, op>; } @@ -6491,6 +6547,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera V128, v4f32, v16i8, op>; } +multiclass SIMDThreeSameVectorFMLA<string asm> { + def v8f16_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h", + V128, v8f16, v8f16, null_frag>; +} + +multiclass SIMDThreeSameVectorFMLAWiden<string asm> { + def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h", + V128, v4f32, v8f16, null_frag>; +} + +multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> { + def v4f16_v2f32 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64, + v2f32, v4f16, OpNode>; + def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128, + v4f32, v8f16, OpNode>; +} + // FP8 assembly/disassembly classes //---------------------------------------------------------------------------- @@ -9112,6 +9185,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm, V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>; } +multiclass SIMDThreeSameVectorFDOTIndex<string asm> { + def v4f16_v2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h", + V64, v2f32, v4f16, VectorIndexS, null_frag>; + def v8f16_v4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h", + V128, v4f32, v8f16, VectorIndexS, null_frag>; +} + //---------------------------------------------------------------------------- // FP8 Advanced SIMD vector x indexed element multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> { @@ -13227,3 +13307,34 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{ let Predicates = [HasNEON, HasF8F32MM]; } } + +//---------------------------------------------------------------------------- +// Contention Management Hints - FEAT_CMH +//---------------------------------------------------------------------------- + +class SHUHInst<string asm> : I< + (outs), + (ins CMHPriorityHint_op:$priority), + asm, "\t$priority", "", []>, Sched<[]> { + bits<1> priority; + let Inst{31-12} = 0b11010101000000110010; + let Inst{11-8} = 0b0110; + let Inst{7-6} = 0b01; + let Inst{5} = priority; + let Inst{4-0} = 0b11111; +} + +multiclass SHUH<string asm> { + def NAME : SHUHInst<asm>; + def : InstAlias<asm, (!cast<Instruction>(NAME) 0), 1>; +} + +class STCPHInst<string asm> : I< + (outs), + (ins), + asm, "", "", []>, Sched<[]> { + let Inst{31-12} = 0b11010101000000110010; + let Inst{11-8} = 0b0110; + let Inst{7-5} = 0b100; + let Inst{4-0} = 0b11111; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d5117da..457e540 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5151,7 +5151,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // GPR32 zeroing if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) { - if (Subtarget.hasZeroCycleZeroingGPR32()) { + if (Subtarget.hasZeroCycleZeroingGPR64() && + !Subtarget.hasZeroCycleZeroingGPR32()) { + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + assert(DestRegX.isValid() && "Destination super-reg not valid"); + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (Subtarget.hasZeroCycleZeroingGPR32()) { BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f788c75..b9e299e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -50,63 +50,44 @@ def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; - def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, - AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; - + AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; def HasEL3 : Predicate<"Subtarget->hasEL3()">, - AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; - + AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; def HasVH : Predicate<"Subtarget->hasVH()">, - AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; - + AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; def HasLOR : Predicate<"Subtarget->hasLOR()">, - AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; - + AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; def HasPAuth : Predicate<"Subtarget->hasPAuth()">, - AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; - + AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, - AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; - + AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; def HasJS : Predicate<"Subtarget->hasJS()">, - AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; - + AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, - AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; - -def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, - AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; - + AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; +def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, + AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; def HasNV : Predicate<"Subtarget->hasNV()">, - AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; - + AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; def HasMPAM : Predicate<"Subtarget->hasMPAM()">, - AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; - + AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; def HasDIT : Predicate<"Subtarget->hasDIT()">, - AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; - -def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, - AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; - + AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; +def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, + AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; def HasAM : Predicate<"Subtarget->hasAM()">, - AssemblerPredicateWithAll<(all_of FeatureAM), "am">; - + AssemblerPredicateWithAll<(all_of FeatureAM), "am">; def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, - AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; - -def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, - AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; - + AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; +def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, + AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; def HasFlagM : Predicate<"Subtarget->hasFlagM()">, - AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; - -def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, - AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; - + AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; +def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, + AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; + AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; def HasNEON : Predicate<"Subtarget->isNeonAvailable()">, AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; def HasSM4 : Predicate<"Subtarget->hasSM4()">, @@ -149,13 +130,13 @@ def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasS AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">, AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">; -def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">, +def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">, AssemblerPredicateWithAll<(all_of FeatureSVEAES), "sve-aes">; -def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">, +def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">, AssemblerPredicateWithAll<(all_of FeatureSVESM4), "sve-sm4">; -def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">, +def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">, AssemblerPredicateWithAll<(all_of FeatureSVESHA3), "sve-sha3">; -def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">, +def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">, AssemblerPredicateWithAll<(all_of FeatureSVEBitPerm), "sve-bitperm">; def HasSMEandIsNonStreamingSafe : Predicate<"Subtarget->hasSME()">, @@ -196,7 +177,7 @@ def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, (all_of FeatureSVE2, FeatureFP8DOT2)), - "ssve-fp8dot2 or (sve2 and fp8dot2)">; + "ssve-fp8dot2 or (sve2 and fp8dot2)">; def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " @@ -204,43 +185,60 @@ def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, (all_of FeatureSVE2, FeatureFP8DOT4)), "ssve-fp8dot4 or (sve2 and fp8dot4)">; -def HasLUT : Predicate<"Subtarget->hasLUT()">, +def HasLUT : Predicate<"Subtarget->hasLUT()">, AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">; -def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">, +def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">, AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">; -def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">, +def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">, AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">; -def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">, +def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">, AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; -def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">, +def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">, AssemblerPredicateWithAll<(all_of FeatureSME_MOP4), "sme-mop4">; -def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">, +def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">, AssemblerPredicateWithAll<(all_of FeatureSME_TMOP), "sme-tmop">; - -def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">, +def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">, AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">; -def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">, +def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">, AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">; -def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">, +def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">, AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">; -def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">, +def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">, AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">; -def HasLSFE : Predicate<"Subtarget->hasLSFE()">, +def HasLSFE : Predicate<"Subtarget->hasLSFE()">, AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">; -def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">, +def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">, AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">; -def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">, +def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">, AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">; -def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">, +def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">, AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">; -def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">, +def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">, AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">; def HasPCDPHINT : Predicate<"Subtarget->hasPCDPHINT()">, - AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">; + AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">; def HasLSUI : Predicate<"Subtarget->hasLSUI()">, - AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">; + AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">; def HasOCCMO : Predicate<"Subtarget->hasOCCMO()">, - AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">; + AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">; +def HasCMH : Predicate<"Subtarget->hasCMH()">, + AssemblerPredicateWithAll<(all_of FeatureCMH), "cmh">; +def HasLSCP : Predicate<"Subtarget->hasLSCP()">, + AssemblerPredicateWithAll<(all_of FeatureLSCP), "lscp">; +def HasSVE2p2 : Predicate<"Subtarget->hasSVE2p2()">, + AssemblerPredicateWithAll<(all_of FeatureSVE2p2), "sve2p2">; +def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_B16MM()">, + AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">; +def HasF16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF16MM()">, + AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">; +def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">, + AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">; +def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">, + AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">; +def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">, + AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">; +def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">, + AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. @@ -310,6 +308,10 @@ def HasSVE2p2_or_SME2p2 : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">, AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">; +def HasSVE2p3_or_SME2p3 + : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p3() || Subtarget->hasSME2p3())">, + AssemblerPredicateWithAll<(any_of FeatureSME2p3, FeatureSVE2p3), + "sme2p3 or sve2p3">; def HasNonStreamingSVE2p2_or_SME2p2 : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||" "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, @@ -328,100 +330,110 @@ def HasNEONandIsStreamingSafe AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; // A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2. def HasNEONandIsSME2p2StreamingSafe - : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">, - AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; + : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, - AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; + AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, - AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; + AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; def HasSB : Predicate<"Subtarget->hasSB()">, - AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; -def HasPredRes : Predicate<"Subtarget->hasPredRes()">, - AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; + AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; +def HasPredRes : Predicate<"Subtarget->hasPredRes()">, + AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; def HasCCDP : Predicate<"Subtarget->hasCCDP()">, - AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; + AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; def HasBTI : Predicate<"Subtarget->hasBTI()">, - AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; + AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, - AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; + AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; def HasTME : Predicate<"Subtarget->hasTME()">, - AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; + AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; def HasETE : Predicate<"Subtarget->hasETE()">, - AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; + AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; def HasTRBE : Predicate<"Subtarget->hasTRBE()">, - AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; + AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; def HasBF16 : Predicate<"Subtarget->hasBF16()">, - AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; + AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">; def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, - AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, - AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, - AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; + AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; def HasXS : Predicate<"Subtarget->hasXS()">, - AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; + AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; def HasWFxT : Predicate<"Subtarget->hasWFxT()">, - AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; + AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; def HasLS64 : Predicate<"Subtarget->hasLS64()">, - AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; + AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; def HasBRBE : Predicate<"Subtarget->hasBRBE()">, - AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; + AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, - AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; + AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; def HasHBC : Predicate<"Subtarget->hasHBC()">, - AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; + AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; def HasMOPS : Predicate<"Subtarget->hasMOPS()">, - AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; + AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, - AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; + AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, - AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; + AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; def HasITE : Predicate<"Subtarget->hasITE()">, - AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; + AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; def HasTHE : Predicate<"Subtarget->hasTHE()">, - AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; + AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, - AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; + AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, - AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; + AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; def HasD128 : Predicate<"Subtarget->hasD128()">, - AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; + AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; def HasCHK : Predicate<"Subtarget->hasCHK()">, - AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; + AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; def HasGCS : Predicate<"Subtarget->hasGCS()">, - AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; + AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; def HasCPA : Predicate<"Subtarget->hasCPA()">, - AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; + AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; +def HasTLBID : Predicate<"Subtarget->hasTLBID()">, + AssemblerPredicateWithAll<(all_of FeatureTLBID), "tlbid">; +def HasMPAMv2 : Predicate<"Subtarget->hasMPAMv2()">, + AssemblerPredicateWithAll<(all_of FeatureMPAMv2), "mpamv2">; +def HasMTETC : Predicate<"Subtarget->hasMTETC()">, + AssemblerPredicateWithAll<(all_of FeatureMTETC), "mtetc">; +def HasGCIE : Predicate<"Subtarget->hasGCIE()">, + AssemblerPredicateWithAll<(all_of FeatureGCIE), "gcie">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; def UseExperimentalZeroingPseudos - : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; + : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; def UseAlternateSExtLoadCVTF32 - : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; + : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; def UseNegativeImmediates - : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), - "NegativeImmediates">; + : Predicate<"false">, + AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), + "NegativeImmediates">; -def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; +def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">; -def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">; +def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">; -def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; +def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; -def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">; +def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">; def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; -def AllowMisalignedMemAccesses : Predicate<"!Subtarget->requiresStrictAlign()">; +def AllowMisalignedMemAccesses + : Predicate<"!Subtarget->requiresStrictAlign()">; def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">; @@ -3692,6 +3704,12 @@ def UDF : UDFType<0, "udf">; // Load instructions. //===----------------------------------------------------------------------===// +let Predicates = [HasLSCP] in { +defm LDAP : LoadAcquirePairOffset<0b0101, "ldap">; +defm LDAPP : LoadAcquirePairOffset<0b0111, "ldapp">; +defm STLP : StoreAcquirePairOffset<0b0101, "stlp">; +} + // Pair (indexed, offset) defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; @@ -4004,26 +4022,6 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; -// load zero-extended i32, bitcast to f64 -def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; - -// load zero-extended i16, bitcast to f64 -def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; - -// load zero-extended i8, bitcast to f64 -def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; - -// load zero-extended i16, bitcast to f32 -def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; - -// load zero-extended i8, bitcast to f32 -def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))), - (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; - // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, @@ -4375,6 +4373,64 @@ def : Pat <(v1i64 (scalar_to_vector (i64 (load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))), (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>; +// Patterns for bitconvert or scalar_to_vector of load operations. +// Enables direct SIMD register loads for small integer types (i8/i16) that are +// naturally zero-extended to i32/i64. +multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy, + SDPatternOperator OuterOp, + PatFrags LoadOp8, PatFrags LoadOp16> { + // 8-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend), bsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend), bsub)>; + + // 16-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend), hsub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend), hsub)>; +} + +// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit. +multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy, + SDPatternOperator OuterOp, + PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> { + defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>; + + // 32-bit loads. + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (SUBREG_TO_REG (i64 0), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend), ssub)>; + def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend))))), + (SUBREG_TO_REG (i64 0), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend), ssub)>; +} + +// Instantiate bitconvert patterns for floating-point types. +defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>; +defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>; + +// Instantiate scalar_to_vector patterns for all vector types. +defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>; +defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>; +defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>; +defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>; + // Pre-fetch. defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", [(AArch64Prefetch timm:$Rt, @@ -5239,113 +5295,10 @@ let Predicates = [HasNEON, HasFPRCVT] in{ defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>; defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>; - defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">; - defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">; -} - - -// AArch64's FCVT instructions saturate when out of range. -multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> { - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat f16:$Rn, i32)), - (!cast<Instruction>(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat f16:$Rn, i64)), - (!cast<Instruction>(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat f32:$Rn, i32)), - (!cast<Instruction>(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat f32:$Rn, i64)), - (!cast<Instruction>(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat f64:$Rn, i32)), - (!cast<Instruction>(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat f64:$Rn, i64)), - (!cast<Instruction>(INST # UXDr) f64:$Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat_gi f16:$Rn)), - (!cast<Instruction>(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f16:$Rn)), - (!cast<Instruction>(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat_gi f32:$Rn)), - (!cast<Instruction>(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f32:$Rn)), - (!cast<Instruction>(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat_gi f64:$Rn)), - (!cast<Instruction>(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f64:$Rn)), - (!cast<Instruction>(INST # UXDr) f64:$Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), - (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), - (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), - (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), - (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), - (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), - (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), - (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), - (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), - (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), - (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), - (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), - (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; + defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>; + defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>; } -defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">; -defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">; - -multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { - def : Pat<(i32 (to_int (round f32:$Rn))), - (!cast<Instruction>(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int (round f32:$Rn))), - (!cast<Instruction>(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int (round f64:$Rn))), - (!cast<Instruction>(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int (round f64:$Rn))), - (!cast<Instruction>(INST # UXDr) f64:$Rn)>; - - // These instructions saturate like fp_to_[su]int_sat. - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), - (!cast<Instruction>(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), - (!cast<Instruction>(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), - (!cast<Instruction>(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), - (!cast<Instruction>(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), - (!cast<Instruction>(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), - (!cast<Instruction>(INST # UXDr) f64:$Rn)>; -} - -defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; -defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; -defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; -defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; -defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; -defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; -defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; -defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; - let Predicates = [HasFullFP16] in { @@ -6553,8 +6506,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; +defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; @@ -6574,6 +6527,7 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", // Floating-point conversion patterns. multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> { + let Predicates = [HasFPRCVT] in { def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))), (!cast<Instruction>(INST # SDr) FPR64:$Rn)>; def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))), @@ -6582,6 +6536,7 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> { (!cast<Instruction>(INST # DHr) FPR16:$Rn)>; def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))), (!cast<Instruction>(INST # DSr) FPR32:$Rn)>; + } def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))), (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>; def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))), @@ -6596,6 +6551,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">; defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">; defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">; defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">; +defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">; +defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">; multiclass FPToIntegerIntPats<Intrinsic round, string INST> { let Predicates = [HasFullFP16] in { @@ -6652,6 +6609,196 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> { defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; +// AArch64's FCVT instructions saturate when out of range. +multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> { + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat f16:$Rn, i32)), + (!cast<Instruction>(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat f16:$Rn, i64)), + (!cast<Instruction>(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat f32:$Rn, i32)), + (!cast<Instruction>(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat f32:$Rn, i64)), + (!cast<Instruction>(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat f64:$Rn, i32)), + (!cast<Instruction>(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat f64:$Rn, i64)), + (!cast<Instruction>(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi f16:$Rn)), + (!cast<Instruction>(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f16:$Rn)), + (!cast<Instruction>(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi f32:$Rn)), + (!cast<Instruction>(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f32:$Rn)), + (!cast<Instruction>(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi f64:$Rn)), + (!cast<Instruction>(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f64:$Rn)), + (!cast<Instruction>(INST # UXDr) f64:$Rn)>; + + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i32 (to_int_sat_gi f16:$Rn)), + (!cast<Instruction>(INST # SHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f16:$Rn)), + (!cast<Instruction>(INST # DHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f32:$Rn)), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi f64:$Rn)), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi f32:$Rn)), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f64:$Rn)), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))), + (!cast<Instruction>(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))), + (!cast<Instruction>(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), + (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), + (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), + (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), + (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), + (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), + (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), + (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), + (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), + (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), + (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), + (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), + (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; +} + +defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">; +defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">; + +multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_gi, SDNode round, string INST> { + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # UXDr) f64:$Rn)>; + + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; + + // These instructions saturate like fp_to_[su]int_sat. + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), + (!cast<Instruction>(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), + (!cast<Instruction>(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), + (!cast<Instruction>(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), + (!cast<Instruction>(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), + (!cast<Instruction>(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), + (!cast<Instruction>(INST # UXDr) f64:$Rn)>; + + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))), + (!cast<Instruction>(INST # SHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))), + (!cast<Instruction>(INST # DHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))), + (!cast<Instruction>(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))), + (!cast<Instruction>(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))), + (!cast<Instruction>(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))), + (!cast<Instruction>(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))), + (!cast<Instruction>(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))), + (!cast<Instruction>(INST # v1i64) f64:$Rn)>; +} + +defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">; +defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">; +defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">; +defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">; +defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">; +defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">; +defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">; +defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">; + // f16 -> s16 conversions let Predicates = [HasFullFP16] in { def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>; @@ -11248,8 +11395,28 @@ let Predicates = [HasLSFE] in { def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">; } +let Predicates = [HasF16F32DOT] in { + defm FDOT :SIMDThreeSameVectorFDot<"fdot">; + defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">; +} + +let Predicates = [HasF16MM] in + defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">; + +let Predicates = [HasF16F32MM] in + defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">; + let Uses = [FPMR, FPCR] in -defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">; + defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">; + +//===----------------------------------------------------------------------===// +// Contention Management Hints (FEAT_CMH) +//===----------------------------------------------------------------------===// + +let Predicates = [HasCMH] in { + defm SHUH : SHUH<"shuh">; // Shared Update Hint instruction + def STCPH : STCPHInst<"stcph">; // Store Concurrent Priority Hint instruction +} include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 47144c7..cd94a25 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1341,6 +1341,10 @@ def Z_q : RegisterOperand<ZPR, "printTypedVectorList<0,'q'>"> { let ParserMatchClass = ZPRVectorList<128, 1>; } +def ZZ_Any : RegisterOperand<ZPR2, "printTypedVectorList<0,0>"> { + let ParserMatchClass = ZPRVectorList<0, 2>; +} + def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> { let ParserMatchClass = ZPRVectorList<8, 2>; } @@ -1361,6 +1365,10 @@ def ZZ_q : RegisterOperand<ZPR2, "printTypedVectorList<0,'q'>"> { let ParserMatchClass = ZPRVectorList<128, 2>; } +def ZZZ_Any : RegisterOperand<ZPR3, "printTypedVectorList<0,0>"> { + let ParserMatchClass = ZPRVectorList<0, 3>; +} + def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> { let ParserMatchClass = ZPRVectorList<8, 3>; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e552afe..752b185 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1173,3 +1173,14 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in { defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">; defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">; } + +//===----------------------------------------------------------------------===// +// SME2.3 instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSME2p3] in { + def LUTI6_ZTZ : sme2_lut_single<"luti6">; + def LUTI6_4ZT3Z : sme2_luti6_zt_consecutive<"luti6">; + def LUTI6_S_4ZT3Z : sme2_luti6_zt_strided<"luti6">; + def LUTI6_4Z2Z2ZI : sme2_luti6_vector_vg4_consecutive<"luti6">; + def LUTI6_S_4Z2Z2ZI : sme2_luti6_vector_vg4_strided<"luti6">; +} // [HasSME2p3] diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 98a128e..3b268dc 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2569,7 +2569,7 @@ let Predicates = [HasBF16, HasSVE_or_SME] in { } // End HasBF16, HasSVE_or_SME let Predicates = [HasBF16, HasSVE] in { - defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>; + defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b011, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>; } // End HasBF16, HasSVE let Predicates = [HasBF16, HasSVE_or_SME] in { @@ -3680,15 +3680,15 @@ let Predicates = [HasSVE_or_SME, HasMatMulInt8] in { } // End HasSVE_or_SME, HasMatMulInt8 let Predicates = [HasSVE, HasMatMulFP32] in { - defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>; + defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b101, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>; } // End HasSVE, HasMatMulFP32 let Predicates = [HasSVE_F16F32MM] in { - def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>; + def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16>; } // End HasSVE_F16F32MM let Predicates = [HasSVE, HasMatMulFP64] in { - defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>; + defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b111, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>; defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>; defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>; defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>; @@ -4272,9 +4272,9 @@ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$ defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>; defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>; defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>; -defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; -defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; -defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; +defm SQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; +defm UQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; +defm SQRSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>; defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>; @@ -4615,6 +4615,75 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>; } // End HasSME2p2orSVE2p2 + +//===----------------------------------------------------------------------===// +// SME2.3 or SVE2.3 instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE2p3_or_SME2p3] in { + // SVE2 Add pairwise within quadword vector segments (unpredicated) + defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", null_frag>; + + // SVE2 Add subtract/subtract pairwise + defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", null_frag>; + defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>; + + // SVE2 integer absolute difference and accumulate long + defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">; + defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">; + + // SVE2 integer dot product + def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>; + def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>; + + // SVE2 integer indexed dot product + def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">; + def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">; + + // SVE2 fp convert, narrow and interleave to integer, rounding toward zero + defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; + defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>; + + // SVE2 signed/unsigned integer convert to floating-point + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; + + // SVE2 saturating shift right narrow by immediate and interleave + defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>; + defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001>; + defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000>; + defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100>; + defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111>; + defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010>; + defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>; + defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>; + defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>; + + defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">; +} // End HasSME2p3orSVE2p3 + +//===----------------------------------------------------------------------===// +// SVE2.3 instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE2p3] in { + def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">; +} + +//===----------------------------------------------------------------------===// +// SVE_B16MM Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE_B16MM] in { + def BFMMLA_ZZZ_H : sve_fp_matrix_mla<0b110, "bfmmla", ZPR16, ZPR16>; +} + +//===----------------------------------------------------------------------===// +// F16MM Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasSVE2p2, HasF16MM] in { + def FMMLA_ZZZ_H : sve_fp_matrix_mla<0b100, "fmmla", ZPR16, ZPR16>; +} + //===----------------------------------------------------------------------===// // SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index bdde8e3..2387f17 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2762,11 +2762,11 @@ def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>; def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>; // Non temporal store, scalar + imm -def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; +def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BHWD]_ZRI$")>; // Non temporal store, scalar + scalar -def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; -def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; +def : InstRW<[V2Write_2c_1L01_1S_1V01], (instrs STNT1H_ZRR)>; +def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BWD]_ZRR$")>; // Scatter non temporal store, vector + scalar 32-bit element size def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>; diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 9438917..ae46d71 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -205,6 +205,7 @@ def lookupDCByName : SearchIndex { let Key = ["Name"]; } +// Op1 CRn CRm Op2 def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>; def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>; def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>; @@ -241,6 +242,11 @@ def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>; def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>; } +let Requires = [{ {AArch64::FeatureMTETC} }] in { +def : DC<"ZGBVA", 0b011, 0b0111, 0b0100, 0b101>; +def : DC<"GBVA", 0b011, 0b0111, 0b0100, 0b111>; +} + let Requires = [{ {AArch64::FeatureMEC} }] in { def : DC<"CIPAE", 0b100, 0b0111, 0b1110, 0b000>; def : DC<"CIGDPAE", 0b100, 0b0111, 0b1110, 0b111>; @@ -813,11 +819,26 @@ def : BTI<"j", 0b100>; def : BTI<"jc", 0b110>; //===----------------------------------------------------------------------===// +// CMHPriority instruction options. +//===----------------------------------------------------------------------===// + +class CMHPriorityHint<string name, bits<1> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<1> Encoding; + let Encoding = encoding; +} + +def : CMHPriorityHint<"ph", 0b1>; + +//===----------------------------------------------------------------------===// // TLBI (translation lookaside buffer invalidate) instruction options. //===----------------------------------------------------------------------===// class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> { + bits<3> op2, bit needsreg, bit optionalreg> { string Name = name; bits<14> Encoding; let Encoding{13-11} = op1; @@ -825,24 +846,25 @@ class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm, let Encoding{6-3} = crm; let Encoding{2-0} = op2; bit NeedsReg = needsreg; + bit OptionalReg = optionalreg; list<string> Requires = []; list<string> ExtraRequires = []; code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; } class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> - : TLBICommon<name, op1, crn, crm, op2, needsreg>; + bits<3> op2, bit needsreg, bit optionalreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>; class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> - : TLBICommon<name, op1, crn, crm, op2, needsreg>; + bits<3> op2, bit needsreg, bit optionalreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>; multiclass TLBITableBase { def NAME # Table : GenericTable { let FilterClass = NAME # "Entry"; let CppTypeName = NAME; - let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + let Fields = ["Name", "Encoding", "NeedsReg", "OptionalReg", "RequiresStr"]; let PrimaryKey = ["Encoding"]; let PrimaryKeyName = "lookup" # NAME # "ByEncoding"; } @@ -856,60 +878,60 @@ defm TLBI : TLBITableBase; defm TLBIP : TLBITableBase; multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg = 1> { - def : TLBIEntry<name, op1, crn, crm, op2, needsreg>; - def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { + bits<3> op2, bit needsreg = 1, bit optionalreg = 0> { + def : TLBIEntry<name, op1, crn, crm, op2, needsreg, optionalreg>; + def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> { let Encoding{7} = 1; let ExtraRequires = ["AArch64::FeatureXS"]; } if !eq(hasTLBIP, true) then { - def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>; - def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { + def : TLBIPEntry<name, op1, crn, crm, op2, needsreg, optionalreg>; + def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> { let Encoding{7} = 1; let ExtraRequires = ["AArch64::FeatureXS"]; } } } -// hasTLBIP op1 CRn CRm op2 needsreg +// hasTLBIP op1 CRn CRm op2 needsreg, optreg defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>; defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>; -defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0, 1>; +defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0, 1>; +defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0, 1>; defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>; defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>; defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>; defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>; defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>; -defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>; +defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0, 1>; defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>; defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>; defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>; +defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0, 1>; defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>; defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>; defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>; -defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0, 0>; +defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0, 0>; +defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0, 0>; defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>; defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>; defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>; defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>; defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>; -defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0, 0>; defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>; defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>; defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>; +defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0, 0>; defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>; // Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) let Requires = ["AArch64::FeatureTLB_RMI"] in { // Armv8.4-A Outer Sharable TLB Maintenance instructions: -// hasTLBIP op1 CRn CRm op2 needsreg -defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>; +// hasTLBIP op1 CRn CRm op2 needsreg, optreg +defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0, 1>; defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>; defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>; defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>; @@ -919,15 +941,15 @@ defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>; defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>; defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>; defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>; +defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0, 1>; defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>; defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>; -defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0, 1>; +defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0, 1>; +defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0, 1>; // Armv8.4-A TLB Range Maintenance instructions: -// hasTLBIP op1 CRn CRm op2 needsreg +// hasTLBIP op1 CRn CRm op2 defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>; defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>; defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>; @@ -962,18 +984,19 @@ defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>; // Armv9-A Realm Management Extension TLBI Instructions let Requires = ["AArch64::FeatureRME"] in { +// hasTLBIP op1 CRn CRm op2 needsreg defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>; defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>; -defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0, 0>; +defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0, 0>; } // Armv9.5-A TLBI VMALL for Dirty State let Requires = ["AArch64::FeatureTLBIW"] in { -// op1, CRn, CRm, op2, needsreg -defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>; -defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>; -defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>; +// hasTLBIP op1 CRn CRm op2 needsreg, optreg +defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0, 0>; +defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0, 1>; +defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0, 1>; } //===----------------------------------------------------------------------===// @@ -1862,13 +1885,6 @@ def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; // v8.4a MPAM registers // Op0 Op1 CRn CRm Op2 -let Requires = [{ {AArch64::FeatureMPAM} }] in { -def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; -def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>; -def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>; def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>; def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>; def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>; @@ -1878,8 +1894,6 @@ def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>; def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>; def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>; def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>; -def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; -} //FeatureMPAM // v8.4a Activity Monitor registers // Op0 Op1 CRn CRm Op2 @@ -2319,6 +2333,26 @@ def : RWSysReg<"MPAMBW0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b101>; def : RWSysReg<"MPAMBWCAP_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b110>; def : RWSysReg<"MPAMBWSM_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b111>; +// v9.7a Memory partitioning and monitoring version 2 +// (FEAT_MPAMv2) registers +// Op0 Op1 CRn CRm Op2 +// MPAM system registers that are also available for MPAMv2 +def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; +def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>; +def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; +// Only MPAMv2 registers +def : RWSysReg<"MPAMCTL_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b010>; +def : RWSysReg<"MPAMCTL_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b010>; +def : RWSysReg<"MPAMCTL_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b010>; +def : RWSysReg<"MPAMCTL_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b010>; +def : RWSysReg<"MPAMVIDCR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b000>; +def : RWSysReg<"MPAMVIDSR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b001>; +def : RWSysReg<"MPAMVIDSR_EL3", 0b11, 0b110, 0b1010, 0b0111, 0b001>; + //===----------------------------------------------------------------------===// // FEAT_SRMASK v9.6a registers //===----------------------------------------------------------------------===// @@ -2412,3 +2446,251 @@ def : DC<"CIVAPS", 0b000, 0b0111, 0b1111, 0b001>; let Requires = [{ {AArch64::FeaturePoPS, AArch64::FeatureMTE} }] in { def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>; } + +// v9.7a TLBI domains system registers (MemSys) +foreach n = 0-3 in { + defvar nb = !cast<bits<3>>(n); + def : RWSysReg<"VTLBID"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1000, nb>; +} + +foreach n = 0-3 in { + defvar nb = !cast<bits<3>>(n); + def : RWSysReg<"VTLBIDOS"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1001, nb>; +} + +def : ROSysReg<"TLBIDIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b110>; + +// MPAM Lookaside Buffer Invalidate (MLBI) instructions +class MLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg> { + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; + string RequiresStr = [{ {AArch64::FeatureMPAMv2} }]; +} + +def MLBITable : GenericTable { + let FilterClass = "MLBI"; + let CppTypeName = "MLBI"; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupMLBIByEncoding"; +} + +def lookupMLBIByName : SearchIndex { + let Table = MLBITable; + let Key = ["Name"]; +} + +// Op1 CRn CRm Op2 needsReg +def : MLBI<"ALLE1", 0b100, 0b0111, 0b0000, 0b100, 0>; +def : MLBI<"VMALLE1", 0b100, 0b0111, 0b0000, 0b101, 0>; +def : MLBI<"VPIDE1", 0b100, 0b0111, 0b0000, 0b110, 1>; +def : MLBI<"VPMGE1", 0b100, 0b0111, 0b0000, 0b111, 1>; + + +// v9.7-A GICv5 (FEAT_GCIE) +// CPU Interface Registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICC_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"ICC_APR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b000>; +def : RWSysReg<"ICC_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>; +def : RWSysReg<"ICC_CR0_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b000>; +def : ROSysReg<"ICC_DOMHPPIR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b010>; +def : ROSysReg<"ICC_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>; +def : ROSysReg<"ICC_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>; +def : ROSysReg<"ICC_HPPIR_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b001>; +def : ROSysReg<"ICC_IAFFIDR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b101>; +def : RWSysReg<"ICC_ICSR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b100>; +def : ROSysReg<"ICC_IDR0_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b010>; +def : RWSysReg<"ICC_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"ICC_PCR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b001>; + +// Virtual CPU Interface Registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICV_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"ICV_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>; +def : RWSysReg<"ICV_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>; +def : RWSysReg<"ICV_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>; +def : RWSysReg<"ICV_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>; + +foreach n=0-3 in { + defvar nb = !cast<bits<2>>(n); +// Op0 Op1 CRn CRm Op2 + def : RWSysReg<"ICC_PPI_DOMAINR"#n#"_EL3", 0b11, 0b110, 0b1100, 0b1000, {0b1,nb{1-0}}>; + +} + +foreach n=0-15 in{ + defvar nb = !cast<bits<4>>(n); +// Op0 Op1 CRn CRm Op2 + def : RWSysReg<"ICC_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>; +} + +// PPI and Virtual PPI Registers +multiclass PPIRegisters<string prefix> { + foreach n=0-1 in { + defvar nb = !cast<bit>(n); +// Op0 Op1 CRn CRm Op2 + def : RWSysReg<prefix#"_PPI_CACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b00,nb}>; + def : RWSysReg<prefix#"_PPI_CPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b10,nb}>; + def : RWSysReg<prefix#"_PPI_ENABLER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b11,nb}>; + def : RWSysReg<prefix#"_PPI_SACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b01,nb}>; + def : RWSysReg<prefix#"_PPI_SPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b11,nb}>; + def : RWSysReg<prefix#"_PPI_HMR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b00,nb}>; + } +} + +defm : PPIRegisters<"ICC">; // PPI Registers +defm : PPIRegisters<"ICV">; // Virtual PPI Registers + +foreach n=0-15 in { + defvar nb = !cast<bits<4>>(n); +// Op0 Op1 CRn CRm Op2 + def : RWSysReg<"ICV_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>; +} + +// Hypervisor Control Registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICH_APR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b100>; +def : RWSysReg<"ICH_CONTEXTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b110>; +def : RWSysReg<"ICH_HFGITR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b111>; +def : RWSysReg<"ICH_HFGRTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>; +def : RWSysReg<"ICH_HFGWTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b110>; +def : ROSysReg<"ICH_HPPIR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b101>; +def : RWSysReg<"ICH_VCTLR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b100>; + +foreach n=0-1 in { + defvar nb = !cast<bit>(n); +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICH_PPI_ACTIVER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b11,nb}>; +def : RWSysReg<"ICH_PPI_DVIR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b00,nb}>; +def : RWSysReg<"ICH_PPI_ENABLER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b01,nb}>; +def : RWSysReg<"ICH_PPI_PENDR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b10,nb}>; +} + +foreach n=0-15 in { + defvar nb = !cast<bits<4>>(n); +// Op0 Op1 CRn CRm Op2 + def : RWSysReg<"ICH_PPI_PRIORITYR"#n#"_EL2", 0b11, 0b100, 0b1100, {0b111,nb{3}}, nb{2-0}>; +} + +//===----------------------------------------------------------------------===// +// GICv5 instruction options. +//===----------------------------------------------------------------------===// + +// GIC +class GIC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> { + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = 1; + string RequiresStr = [{ {AArch64::FeatureGCIE} }]; +} + +// GSB +class GSB<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> { + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + string RequiresStr = [{ {AArch64::FeatureGCIE} }]; +} + +// GICR +class GICR<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> { + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = 1; + string RequiresStr = [{ {AArch64::FeatureGCIE} }]; +} + +def GICTable : GenericTable { + let FilterClass = "GIC"; + let CppTypeName = "GIC"; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupGICByEncoding"; +} + +def GSBTable : GenericTable { + let FilterClass = "GSB"; + let CppTypeName = "GSB"; + let Fields = ["Name", "Encoding", "RequiresStr"]; + + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupGSBByEncoding"; +} + +def GICRTable : GenericTable { + let FilterClass = "GICR"; + let CppTypeName = "GICR"; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupGICRByEncoding"; +} + +def lookupGICByName : SearchIndex { + let Table = GICTable; + let Key = ["Name"]; +} + +def lookupGSBByName : SearchIndex { + let Table = GSBTable; + let Key = ["Name"]; +} + +def lookupGICRByName : SearchIndex { + let Table = GICRTable; + let Key = ["Name"]; +} + +// Op1 CRn CRm Op2 +def : GSB<"sys", 0b000, 0b1100, 0b0000, 0b000>; +def : GSB<"ack", 0b000, 0b1100, 0b0000, 0b001>; + +// Op1 CRn CRm Op2 +def : GICR<"cdia", 0b000, 0b1100, 0b0011, 0b000>; +def : GICR<"cdnmia", 0b000, 0b1100, 0b0011, 0b001>; + +// Op1 CRn CRm Op2 +def : GIC<"cdaff", 0b000, 0b1100, 0b0001, 0b011>; +def : GIC<"cddi", 0b000, 0b1100, 0b0010, 0b000>; +def : GIC<"cddis", 0b000, 0b1100, 0b0001, 0b000>; +def : GIC<"cden", 0b000, 0b1100, 0b0001, 0b001>; +def : GIC<"cdeoi", 0b000, 0b1100, 0b0001, 0b111>; +def : GIC<"cdhm", 0b000, 0b1100, 0b0010, 0b001>; +def : GIC<"cdpend", 0b000, 0b1100, 0b0001, 0b100>; +def : GIC<"cdpri", 0b000, 0b1100, 0b0001, 0b010>; +def : GIC<"cdrcfg", 0b000, 0b1100, 0b0001, 0b101>; +def : GIC<"vdaff", 0b100, 0b1100, 0b0001, 0b011>; +def : GIC<"vddi", 0b100, 0b1100, 0b0010, 0b000>; +def : GIC<"vddis", 0b100, 0b1100, 0b0001, 0b000>; +def : GIC<"vden", 0b100, 0b1100, 0b0001, 0b001>; +def : GIC<"vdhm", 0b100, 0b1100, 0b0010, 0b001>; +def : GIC<"vdpend", 0b100, 0b1100, 0b0001, 0b100>; +def : GIC<"vdpri", 0b100, 0b1100, 0b0001, 0b010>; +def : GIC<"vdrcfg", 0b100, 0b1100, 0b0001, 0b101>; +def : GIC<"ldaff", 0b110, 0b1100, 0b0001, 0b011>; +def : GIC<"lddi", 0b110, 0b1100, 0b0010, 0b000>; +def : GIC<"lddis", 0b110, 0b1100, 0b0001, 0b000>; +def : GIC<"lden", 0b110, 0b1100, 0b0001, 0b001>; +def : GIC<"ldhm", 0b110, 0b1100, 0b0010, 0b001>; +def : GIC<"ldpend", 0b110, 0b1100, 0b0001, 0b100>; +def : GIC<"ldpri", 0b110, 0b1100, 0b0001, 0b010>; +def : GIC<"ldrcfg", 0b110, 0b1100, 0b0001, 0b101>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e3370d3..fede586 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -224,7 +224,8 @@ static cl::opt<bool> EnableScalableAutovecInStreamingMode( static bool isSMEABIRoutineCall(const CallInst &CI, const AArch64TargetLowering &TLI) { const auto *F = CI.getCalledFunction(); - return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine(); + return F && + SMEAttrs(F->getName(), TLI.getRuntimeLibcallsInfo()).isSMEABIRoutine(); } /// Returns true if the function has explicit operations that can only be @@ -355,7 +356,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call, // change only once and avoid inlining of G into F. SMEAttrs FAttrs(*F); - SMECallAttrs CallAttrs(Call, getTLI()); + SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo()); if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) { if (F == Call.getCaller()) // (1) @@ -957,23 +958,50 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return TyL.first + ExtraCost; } case Intrinsic::get_active_lane_mask: { - auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType()); - if (RetTy) { - EVT RetVT = getTLI()->getValueType(DL, RetTy); - EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]); - if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) && - !getTLI()->isTypeLegal(RetVT)) { - // We don't have enough context at this point to determine if the mask - // is going to be kept live after the block, which will force the vXi1 - // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32. - // For now, we just assume the vectorizer created this intrinsic and - // the result will be the input for a PHI. In this case the cost will - // be extremely high for fixed-width vectors. - // NOTE: getScalarizationOverhead returns a cost that's far too - // pessimistic for the actual generated codegen. In reality there are - // two instructions generated per lane. - return RetTy->getNumElements() * 2; + auto RetTy = cast<VectorType>(ICA.getReturnType()); + EVT RetVT = getTLI()->getValueType(DL, RetTy); + EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]); + if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT)) + break; + + if (RetTy->isScalableTy()) { + if (TLI->getTypeAction(RetTy->getContext(), RetVT) != + TargetLowering::TypeSplitVector) + break; + + auto LT = getTypeLegalizationCost(RetTy); + InstructionCost Cost = LT.first; + // When SVE2p1 or SME2 is available, we can halve getTypeLegalizationCost + // as get_active_lane_mask may lower to the sve_whilelo_x2 intrinsic, e.g. + // nxv32i1 = get_active_lane_mask(base, idx) -> + // {nxv16i1, nxv16i1} = sve_whilelo_x2(base, idx) + if (ST->hasSVE2p1() || ST->hasSME2()) { + Cost /= 2; + if (Cost == 1) + return Cost; } + + // If more than one whilelo intrinsic is required, include the extra cost + // required by the saturating add & select required to increment the + // start value after the first intrinsic call. + Type *OpTy = ICA.getArgTypes()[0]; + IntrinsicCostAttributes AddAttrs(Intrinsic::uadd_sat, OpTy, {OpTy, OpTy}); + InstructionCost SplitCost = getIntrinsicInstrCost(AddAttrs, CostKind); + Type *CondTy = OpTy->getWithNewBitWidth(1); + SplitCost += getCmpSelInstrCost(Instruction::Select, OpTy, CondTy, + CmpInst::ICMP_UGT, CostKind); + return Cost + (SplitCost * (Cost - 1)); + } else if (!getTLI()->isTypeLegal(RetVT)) { + // We don't have enough context at this point to determine if the mask + // is going to be kept live after the block, which will force the vXi1 + // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32. + // For now, we just assume the vectorizer created this intrinsic and + // the result will be the input for a PHI. In this case the cost will + // be extremely high for fixed-width vectors. + // NOTE: getScalarizationOverhead returns a cost that's far too + // pessimistic for the actual generated codegen. In reality there are + // two instructions generated per lane. + return cast<FixedVectorType>(RetTy)->getNumElements() * 2; } break; } @@ -1577,18 +1605,26 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { } static bool isAllActivePredicate(Value *Pred) { - // Look through convert.from.svbool(convert.to.svbool(...) chain. Value *UncastedPred; + + // Look through predicate casts that only remove lanes. if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>( - m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>( - m_Value(UncastedPred))))) - // If the predicate has the same or less lanes than the uncasted - // predicate then we know the casting has no effect. - if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <= - cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements()) - Pred = UncastedPred; + m_Value(UncastedPred)))) { + auto *OrigPredTy = cast<ScalableVectorType>(Pred->getType()); + Pred = UncastedPred; + + if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>( + m_Value(UncastedPred)))) + // If the predicate has the same or less lanes than the uncasted predicate + // then we know the casting has no effect. + if (OrigPredTy->getMinNumElements() <= + cast<ScalableVectorType>(UncastedPred->getType()) + ->getMinNumElements()) + Pred = UncastedPred; + } + auto *C = dyn_cast<Constant>(Pred); - return (C && C->isAllOnesValue()); + return C && C->isAllOnesValue(); } // Simplify `V` by only considering the operations that affect active lanes. diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 636d4f8a..6273cfc 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -159,6 +159,7 @@ private: SMLoc getLoc() const { return getParser().getTok().getLoc(); } bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); + bool parseSyslAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); bool parseSyspAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S); AArch64CC::CondCode parseCondCodeString(StringRef Cond, @@ -266,6 +267,7 @@ private: ParseStatus tryParseRPRFMOperand(OperandVector &Operands); ParseStatus tryParsePSBHint(OperandVector &Operands); ParseStatus tryParseBTIHint(OperandVector &Operands); + ParseStatus tryParseCMHPriorityHint(OperandVector &Operands); ParseStatus tryParseAdrpLabel(OperandVector &Operands); ParseStatus tryParseAdrLabel(OperandVector &Operands); template <bool AddFPZeroAsLiteral> @@ -370,6 +372,7 @@ private: k_PSBHint, k_PHint, k_BTIHint, + k_CMHPriorityHint, } Kind; SMLoc StartLoc, EndLoc; @@ -499,6 +502,11 @@ private: unsigned Length; unsigned Val; }; + struct CMHPriorityHintOp { + const char *Data; + unsigned Length; + unsigned Val; + }; struct SVCROp { const char *Data; @@ -525,6 +533,7 @@ private: struct PSBHintOp PSBHint; struct PHintOp PHint; struct BTIHintOp BTIHint; + struct CMHPriorityHintOp CMHPriorityHint; struct ShiftExtendOp ShiftExtend; struct SVCROp SVCR; }; @@ -595,6 +604,9 @@ public: case k_BTIHint: BTIHint = o.BTIHint; break; + case k_CMHPriorityHint: + CMHPriorityHint = o.CMHPriorityHint; + break; case k_ShiftExtend: ShiftExtend = o.ShiftExtend; break; @@ -769,6 +781,16 @@ public: return StringRef(BTIHint.Data, BTIHint.Length); } + unsigned getCMHPriorityHint() const { + assert(Kind == k_CMHPriorityHint && "Invalid access!"); + return CMHPriorityHint.Val; + } + + StringRef getCMHPriorityHintName() const { + assert(Kind == k_CMHPriorityHint && "Invalid access!"); + return StringRef(CMHPriorityHint.Data, CMHPriorityHint.Length); + } + StringRef getSVCR() const { assert(Kind == k_SVCR && "Invalid access!"); return StringRef(SVCR.Data, SVCR.Length); @@ -1511,6 +1533,7 @@ public: bool isPSBHint() const { return Kind == k_PSBHint; } bool isPHint() const { return Kind == k_PHint; } bool isBTIHint() const { return Kind == k_BTIHint; } + bool isCMHPriorityHint() const { return Kind == k_CMHPriorityHint; } bool isShiftExtend() const { return Kind == k_ShiftExtend; } bool isShifter() const { if (!isShiftExtend()) @@ -2196,6 +2219,11 @@ public: Inst.addOperand(MCOperand::createImm(getBTIHint())); } + void addCMHPriorityHintOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getCMHPriorityHint())); + } + void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned Imm = @@ -2547,6 +2575,17 @@ public: } static std::unique_ptr<AArch64Operand> + CreateCMHPriorityHint(unsigned Val, StringRef Str, SMLoc S, MCContext &Ctx) { + auto Op = std::make_unique<AArch64Operand>(k_CMHPriorityHint, Ctx); + Op->CMHPriorityHint.Val = Val; + Op->CMHPriorityHint.Data = Str.data(); + Op->CMHPriorityHint.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static std::unique_ptr<AArch64Operand> CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind, SMLoc S, SMLoc E, MCContext &Ctx) { auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx); @@ -2656,6 +2695,9 @@ void AArch64Operand::print(raw_ostream &OS, const MCAsmInfo &MAI) const { case k_BTIHint: OS << getBTIHintName(); break; + case k_CMHPriorityHint: + OS << getCMHPriorityHintName(); + break; case k_MatrixRegister: OS << "<matrix " << getMatrixReg() << ">"; break; @@ -3279,6 +3321,24 @@ ParseStatus AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) { return ParseStatus::Success; } +/// tryParseCMHPriorityHint - Try to parse a CMHPriority operand +ParseStatus AArch64AsmParser::tryParseCMHPriorityHint(OperandVector &Operands) { + SMLoc S = getLoc(); + const AsmToken &Tok = getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return TokError("invalid operand for instruction"); + + auto CMHPriority = + AArch64CMHPriorityHint::lookupCMHPriorityHintByName(Tok.getString()); + if (!CMHPriority) + return TokError("invalid operand for instruction"); + + Operands.push_back(AArch64Operand::CreateCMHPriorityHint( + CMHPriority->Encoding, Tok.getString(), S, getContext())); + Lex(); // Eat identifier token. + return ParseStatus::Success; +} + /// tryParseAdrpLabel - Parse and validate a source label for the ADRP /// instruction. ParseStatus AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { @@ -3824,6 +3884,18 @@ static const struct Extension { {"ssve-bitperm", {AArch64::FeatureSSVE_BitPerm}}, {"sme-mop4", {AArch64::FeatureSME_MOP4}}, {"sme-tmop", {AArch64::FeatureSME_TMOP}}, + {"cmh", {AArch64::FeatureCMH}}, + {"lscp", {AArch64::FeatureLSCP}}, + {"tlbid", {AArch64::FeatureTLBID}}, + {"mpamv2", {AArch64::FeatureMPAMv2}}, + {"mtetc", {AArch64::FeatureMTETC}}, + {"gcie", {AArch64::FeatureGCIE}}, + {"sme2p3", {AArch64::FeatureSME2p3}}, + {"sve2p3", {AArch64::FeatureSVE2p3}}, + {"sve-b16mm", {AArch64::FeatureSVE_B16MM}}, + {"f16mm", {AArch64::FeatureF16MM}}, + {"f16f32dot", {AArch64::FeatureF16F32DOT}}, + {"f16f32mm", {AArch64::FeatureF16F32MM}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { @@ -3861,6 +3933,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { Str += "ARMv9.5a"; else if (FBS[AArch64::HasV9_6aOps]) Str += "ARMv9.6a"; + else if (FBS[AArch64::HasV9_7aOps]) + Str += "ARMv9.7a"; else if (FBS[AArch64::HasV8_0rOps]) Str += "ARMv8r"; else { @@ -3894,8 +3968,9 @@ void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); } -/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for -/// the SYS instruction. Parse them specially so that we create a SYS MCInst. +/// parseSysAlias - The IC, DC, AT, TLBI, MLBI and GIC{R} and GSB instructions +/// are simple aliases for the SYS instruction. Parse them specially so that +/// we create a SYS MCInst. bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands) { if (Name.contains('.')) @@ -3908,6 +3983,8 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, StringRef Op = Tok.getString(); SMLoc S = Tok.getLoc(); bool ExpectRegister = true; + bool OptionalRegister = false; + bool hasAll = getSTI().hasFeature(AArch64::FeatureAll); if (Mnemonic == "ic") { const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op); @@ -3950,13 +4027,50 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, return TokError(Str); } ExpectRegister = TLBI->NeedsReg; + bool hasTLBID = getSTI().hasFeature(AArch64::FeatureTLBID); + if (hasAll || hasTLBID) { + OptionalRegister = TLBI->OptionalReg; + } createSysAlias(TLBI->Encoding, Operands, S); - } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" || Mnemonic == "cosp") { + } else if (Mnemonic == "mlbi") { + const AArch64MLBI::MLBI *MLBI = AArch64MLBI::lookupMLBIByName(Op); + if (!MLBI) + return TokError("invalid operand for MLBI instruction"); + else if (!MLBI->haveFeatures(getSTI().getFeatureBits())) { + std::string Str("MLBI " + std::string(MLBI->Name) + " requires: "); + setRequiredFeatureString(MLBI->getRequiredFeatures(), Str); + return TokError(Str); + } + ExpectRegister = MLBI->NeedsReg; + createSysAlias(MLBI->Encoding, Operands, S); + } else if (Mnemonic == "gic") { + const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByName(Op); + if (!GIC) + return TokError("invalid operand for GIC instruction"); + else if (!GIC->haveFeatures(getSTI().getFeatureBits())) { + std::string Str("GIC " + std::string(GIC->Name) + " requires: "); + setRequiredFeatureString(GIC->getRequiredFeatures(), Str); + return TokError(Str); + } + ExpectRegister = true; + createSysAlias(GIC->Encoding, Operands, S); + } else if (Mnemonic == "gsb") { + const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByName(Op); + if (!GSB) + return TokError("invalid operand for GSB instruction"); + else if (!GSB->haveFeatures(getSTI().getFeatureBits())) { + std::string Str("GSB " + std::string(GSB->Name) + " requires: "); + setRequiredFeatureString(GSB->getRequiredFeatures(), Str); + return TokError(Str); + } + ExpectRegister = false; + createSysAlias(GSB->Encoding, Operands, S); + } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" || + Mnemonic == "cosp") { if (Op.lower() != "rctx") return TokError("invalid operand for prediction restriction instruction"); - bool hasAll = getSTI().hasFeature(AArch64::FeatureAll); bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes); bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2); @@ -3989,10 +4103,61 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, HasRegister = true; } - if (ExpectRegister && !HasRegister) - return TokError("specified " + Mnemonic + " op requires a register"); - else if (!ExpectRegister && HasRegister) - return TokError("specified " + Mnemonic + " op does not use a register"); + if (!OptionalRegister) { + if (ExpectRegister && !HasRegister) + return TokError("specified " + Mnemonic + " op requires a register"); + else if (!ExpectRegister && HasRegister) + return TokError("specified " + Mnemonic + " op does not use a register"); + } + + if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list")) + return true; + + return false; +} + +/// parseSyslAlias - The GICR instructions are simple aliases for +/// the SYSL instruction. Parse them specially so that we create a +/// SYS MCInst. +bool AArch64AsmParser::parseSyslAlias(StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + + Mnemonic = Name; + Operands.push_back( + AArch64Operand::CreateToken("sysl", NameLoc, getContext())); + + // Now expect two operands (identifier + register) + SMLoc startLoc = getLoc(); + const AsmToken ®Tok = getTok(); + StringRef reg = regTok.getString(); + unsigned RegNum = matchRegisterNameAlias(reg.lower(), RegKind::Scalar); + if (!RegNum) + return TokError("expected register operand"); + + Operands.push_back(AArch64Operand::CreateReg( + RegNum, RegKind::Scalar, startLoc, getLoc(), getContext(), EqualsReg)); + + Lex(); // Eat token + if (parseToken(AsmToken::Comma)) + return true; + + // Check for identifier + const AsmToken &operandTok = getTok(); + StringRef Op = operandTok.getString(); + SMLoc S2 = operandTok.getLoc(); + Lex(); // Eat token + + if (Mnemonic == "gicr") { + const AArch64GICR::GICR *GICR = AArch64GICR::lookupGICRByName(Op); + if (!GICR) + return Error(S2, "invalid operand for GICR instruction"); + else if (!GICR->haveFeatures(getSTI().getFeatureBits())) { + std::string Str("GICR " + std::string(GICR->Name) + " requires: "); + setRequiredFeatureString(GICR->getRequiredFeatures(), Str); + return Error(S2, Str); + } + createSysAlias(GICR->Encoding, Operands, S2); + } if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list")) return true; @@ -4025,7 +4190,7 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc, return TokError("invalid operand for TLBIP instruction"); const AArch64TLBIP::TLBIP TLBIP( TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), - TLBIPorig->NeedsReg, + TLBIPorig->NeedsReg, TLBIPorig->OptionalReg, HasnXSQualifier ? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) : TLBIPorig->FeaturesRequired); @@ -4719,6 +4884,13 @@ ParseStatus AArch64AsmParser::tryParseVectorList(OperandVector &Operands, FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S, getLoc(), getContext())); + if (getTok().is(AsmToken::LBrac)) { + ParseStatus Res = tryParseVectorIndex(Operands); + if (Res.isFailure()) + return ParseStatus::Failure; + return ParseStatus::Success; + } + return ParseStatus::Success; } @@ -5267,12 +5439,17 @@ bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info, size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); - // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for - // the SYS instruction. + // IC, DC, AT, TLBI, MLBI, GIC{R}, GSB and Prediction invalidation + // instructions are aliases for the SYS instruction. if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" || - Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp") + Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp" || + Head == "mlbi" || Head == "gic" || Head == "gsb") return parseSysAlias(Head, NameLoc, Operands); + // GICR instructions are aliases for the SYSL instruction. + if (Head == "gicr") + return parseSyslAlias(Head, NameLoc, Operands); + // TLBIP instructions are aliases for the SYSP instruction. if (Head == "tlbip") return parseSyspAlias(Head, NameLoc, Operands); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 3e55b76..14b0f9a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5126,23 +5126,13 @@ bool AArch64InstructionSelector::selectShuffleVector( MachineInstr &I, MachineRegisterInfo &MRI) { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); Register Src1Reg = I.getOperand(1).getReg(); - const LLT Src1Ty = MRI.getType(Src1Reg); Register Src2Reg = I.getOperand(2).getReg(); - const LLT Src2Ty = MRI.getType(Src2Reg); ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); LLVMContext &Ctx = MF.getFunction().getContext(); - // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if - // it's originated from a <1 x T> type. Those should have been lowered into - // G_BUILD_VECTOR earlier. - if (!Src1Ty.isVector() || !Src2Ty.isVector()) { - LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); - return false; - } - unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; SmallVector<Constant *, 64> CstIdxs; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 05a4313..5f93847 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1201,25 +1201,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return llvm::is_contained( {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy); }) - // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar - // destinations, we just want those lowered into G_BUILD_VECTOR or - // G_EXTRACT_ELEMENT. - .lowerIf([=](const LegalityQuery &Query) { - return !Query.Types[0].isVector() || !Query.Types[1].isVector(); - }) .moreElementsIf( [](const LegalityQuery &Query) { - return Query.Types[0].isVector() && Query.Types[1].isVector() && - Query.Types[0].getNumElements() > - Query.Types[1].getNumElements(); + return Query.Types[0].getNumElements() > + Query.Types[1].getNumElements(); }, changeTo(1, 0)) .moreElementsToNextPow2(0) .moreElementsIf( [](const LegalityQuery &Query) { - return Query.Types[0].isVector() && Query.Types[1].isVector() && - Query.Types[0].getNumElements() < - Query.Types[1].getNumElements(); + return Query.Types[0].getNumElements() < + Query.Types[1].getNumElements(); }, changeTo(0, 1)) .widenScalarOrEltToNextPow2OrMinSize(0, 8) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 830a35bb..6d2d705 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -856,7 +856,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case TargetOpcode::G_FPTOSI_SAT: - case TargetOpcode::G_FPTOUI_SAT: { + case TargetOpcode::G_FPTOUI_SAT: + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: { LLT DstType = MRI.getType(MI.getOperand(0).getReg()); if (DstType.isVector()) break; @@ -864,11 +866,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; break; } - OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; + TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI); + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) && + all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + return onlyUsesFP(UseMI, MRI, TRI) || + prefersFPUse(UseMI, MRI, TRI); + })) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + else + OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; } - case TargetOpcode::G_FPTOSI: - case TargetOpcode::G_FPTOUI: case TargetOpcode::G_INTRINSIC_LRINT: case TargetOpcode::G_INTRINSIC_LLRINT: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 35bd244..5c3e26e 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -84,6 +84,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address, return; } + if (Opcode == AArch64::SYSLxt) + if (printSyslAlias(MI, STI, O)) { + printAnnotation(O, Annot); + return; + } + if (Opcode == AArch64::SYSPxt || Opcode == AArch64::SYSPxt_XZR) if (printSyspAlias(MI, STI, O)) { printAnnotation(O, Annot); @@ -909,13 +915,25 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, Encoding |= CnVal << 7; Encoding |= Op1Val << 11; - bool NeedsReg; + bool NeedsReg = false; + bool OptionalReg = false; std::string Ins; std::string Name; if (CnVal == 7) { switch (CmVal) { default: return false; + // MLBI aliases + case 0: { + const AArch64MLBI::MLBI *MLBI = + AArch64MLBI::lookupMLBIByEncoding(Encoding); + if (!MLBI || !MLBI->haveFeatures(STI.getFeatureBits())) + return false; + + NeedsReg = MLBI->NeedsReg; + Ins = "mlbi\t"; + Name = std::string(MLBI->Name); + } break; // Maybe IC, maybe Prediction Restriction case 1: switch (Op1Val) { @@ -1004,19 +1022,41 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, return false; NeedsReg = TLBI->NeedsReg; + if (STI.hasFeature(AArch64::FeatureAll) || + STI.hasFeature(AArch64::FeatureTLBID)) + OptionalReg = TLBI->OptionalReg; Ins = "tlbi\t"; Name = std::string(TLBI->Name); - } - else + } else if (CnVal == 12) { + if (CmVal != 0) { + // GIC aliases + const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByEncoding(Encoding); + if (!GIC || !GIC->haveFeatures(STI.getFeatureBits())) + return false; + + NeedsReg = true; + Ins = "gic\t"; + Name = std::string(GIC->Name); + } else { + // GSB aliases + const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByEncoding(Encoding); + if (!GSB || !GSB->haveFeatures(STI.getFeatureBits())) + return false; + + NeedsReg = false; + Ins = "gsb\t"; + Name = std::string(GSB->Name); + } + } else return false; StringRef Reg = getRegisterName(MI->getOperand(4).getReg()); bool NotXZR = Reg != "xzr"; - // If a mandatory is not specified in the TableGen + // If a mandatory or optional register is not specified in the TableGen // (i.e. no register operand should be present), and the register value // is not xzr/x31, then disassemble to a SYS alias instead. - if (NotXZR && !NeedsReg) + if (NotXZR && !NeedsReg && !OptionalReg) return false; std::string Str = Ins + Name; @@ -1024,12 +1064,64 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, O << '\t' << Str; - if (NeedsReg) + // For optional registers, don't print the value if it's xzr/x31 + // since this defaults to xzr/x31 if register is not specified. + if (NeedsReg || (OptionalReg && NotXZR)) O << ", " << Reg; return true; } +bool AArch64InstPrinter::printSyslAlias(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { +#ifndef NDEBUG + unsigned Opcode = MI->getOpcode(); + assert(Opcode == AArch64::SYSLxt && "Invalid opcode for SYSL alias!"); +#endif + + StringRef Reg = getRegisterName(MI->getOperand(0).getReg()); + const MCOperand &Op1 = MI->getOperand(1); + const MCOperand &Cn = MI->getOperand(2); + const MCOperand &Cm = MI->getOperand(3); + const MCOperand &Op2 = MI->getOperand(4); + + unsigned Op1Val = Op1.getImm(); + unsigned CnVal = Cn.getImm(); + unsigned CmVal = Cm.getImm(); + unsigned Op2Val = Op2.getImm(); + + uint16_t Encoding = Op2Val; + Encoding |= CmVal << 3; + Encoding |= CnVal << 7; + Encoding |= Op1Val << 11; + + std::string Ins; + std::string Name; + + if (CnVal == 12) { + if (CmVal == 3) { + // GICR aliases + const AArch64GICR::GICR *GICR = + AArch64GICR::lookupGICRByEncoding(Encoding); + if (!GICR || !GICR->haveFeatures(STI.getFeatureBits())) + return false; + + Ins = "gicr"; + Name = std::string(GICR->Name); + } else + return false; + } else + return false; + + std::string Str; + llvm::transform(Name, Name.begin(), ::tolower); + + O << '\t' << Ins << '\t' << Reg.str() << ", " << Name; + + return true; +} + bool AArch64InstPrinter::printSyspAlias(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1508,6 +1600,17 @@ void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum, markup(O, Markup::Immediate) << '#' << formatImm(btihintop); } +void AArch64InstPrinter::printCMHPriorityHintOp(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned priorityhint_op = MI->getOperand(OpNum).getImm(); + auto PHint = + AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(priorityhint_op); + if (PHint) + O << PHint->Name; +} + void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index 15ef2dd..307402d 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -52,6 +52,8 @@ public: protected: bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); + bool printSyslAlias(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); bool printSyspAlias(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); bool printRangePrefetchAlias(const MCInst *MI, const MCSubtargetInfo &STI, @@ -151,6 +153,9 @@ protected: void printBTIHintOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + void printCMHPriorityHintOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 33f35ad..99836ae 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3920,6 +3920,78 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> { def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; } +// 8-bit Look up table +class sme2_lut_single<string asm> + : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn), + asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { + bits<0> ZTt; + bits<5> Zd; + bits<5> Zn; + let Inst{31-10} = 0b1100000011001000010000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +//===----------------------------------------------------------------------===// +// Lookup table read with 6-bit indices (8-bit) +class sme2_luti6_zt_base<RegisterOperand zd_ty, string asm> + : I<(outs zd_ty:$Zd), (ins ZTR:$ZTt, ZZZ_Any:$Zn), + asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { + bits<0> ZTt; + bits<3> Zd; + bits<3> Zn; + let Inst{31-21} = 0b11000000100; + let Inst{19-10} = 0b1010000000; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; +} + +class sme2_luti6_zt_consecutive<string asm> + : sme2_luti6_zt_base<ZZZZ_b_mul_r, asm> { + let Inst{20} = 0; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +class sme2_luti6_zt_strided<string asm> + : sme2_luti6_zt_base<ZZZZ_b_strided, asm> { + let Inst{20} = 1; + let Inst{4} = Zd{2}; + let Inst{3-2} = 0b00; + let Inst{1-0} = Zd{1-0}; +} + +//===----------------------------------------------------------------------===// +// Lookup table read with 6-bit indices (8-bit) +class sme2_luti6_vector_vg4_base<RegisterOperand zd_ty, string asm> + : I<(outs zd_ty:$Zd), (ins ZZ_h:$Zn, ZZ_Any:$Zm, VectorIndexD:$i1), + asm, "\t$Zd, $Zn, $Zm$i1", "", []>, Sched<[]> { + bits<3> Zd; + bits<5> Zn; + bits<5> Zm; + bits<1> i1; + let Inst{31-23} = 0b110000010; + let Inst{22} = i1; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{9-5} = Zn; +} + +class sme2_luti6_vector_vg4_consecutive<string asm> + : sme2_luti6_vector_vg4_base<ZZZZ_h_mul_r, asm> { + let Inst{15-10} = 0b111101; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +class sme2_luti6_vector_vg4_strided<string asm> + : sme2_luti6_vector_vg4_base<ZZZZ_h_strided, asm> { + let Inst{15-10} = 0b111111; + let Inst{4} = Zd{2}; + let Inst{3-2} = 0b00; + let Inst{1-0} = Zd{1-0}; +} + //===----------------------------------------------------------------------===// // SME2 MOV class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 3cdd505..1664f4a 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3787,7 +3787,7 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri // SVE Integer Dot Product Group - Indexed Group //===----------------------------------------------------------------------===// -class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, +class sve_intx_dot_by_indexed_elem<bit U, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, ZPRRegOp zprty3, Operand itype> : I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop), @@ -3795,8 +3795,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; - let Inst{31-23} = 0b010001001; - let Inst{22} = sz; + let Inst{31-24} = 0b01000100; let Inst{21} = 0b1; let Inst{15-11} = 0; let Inst{10} = U; @@ -3810,16 +3809,18 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm, SDPatternOperator op> { - def _BtoS : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> { + def _BtoS : sve_intx_dot_by_indexed_elem<opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> { bits<2> iop; bits<3> Zm; + let Inst{23-22} = 0b10; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _HtoD : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> { + def _HtoD : sve_intx_dot_by_indexed_elem<opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> { bits<1> iop; bits<4> Zm; - let Inst{20} = iop; + let Inst{23-22} = 0b11; + let Inst{20} = iop; let Inst{19-16} = Zm; } @@ -3827,6 +3828,16 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm, def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _HtoD)>; } +class sve_intx_dot_by_indexed_elem_x<bit opc, string asm> +: sve_intx_dot_by_indexed_elem<opc, asm, ZPR16, ZPR8, ZPR3b8, VectorIndexH32b_timm> { + bits<3> iop; + bits<3> Zm; + let Inst{23} = 0b0; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; +} + //===----------------------------------------------------------------------===// // SVE2 Complex Integer Dot Product Group //===----------------------------------------------------------------------===// @@ -4085,7 +4096,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm, bits<5> Zdn; let Inst{31-24} = 0b01000100; let Inst{23-22} = sz; - let Inst{21-20} = 0b01; + let Inst{21} = 0b0; let Inst{20-16} = opc{5-1}; let Inst{15-14} = 0b10; let Inst{13} = opc{0}; @@ -4590,15 +4601,15 @@ multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> { def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, complexrotateopodd, !cast<Instruction>(NAME # _D)>; } -class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm, +class sve2_int_absdiff_accum<bits<3> sz, bits<4> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; bits<5> Zm; - let Inst{31-24} = 0b01000101; - let Inst{23-22} = sz; + let Inst{31-25} = 0b0100010; + let Inst{24-22} = sz; let Inst{21} = 0b0; let Inst{20-16} = Zm; let Inst{15-14} = 0b11; @@ -4613,10 +4624,10 @@ class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm, } multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> { - def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>; + def _B : sve2_int_absdiff_accum<0b100, { 0b111, opc }, asm, ZPR8, ZPR8>; + def _H : sve2_int_absdiff_accum<0b101, { 0b111, opc }, asm, ZPR16, ZPR16>; + def _S : sve2_int_absdiff_accum<0b110, { 0b111, opc }, asm, ZPR32, ZPR32>; + def _D : sve2_int_absdiff_accum<0b111, { 0b111, opc }, asm, ZPR64, ZPR64>; def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; @@ -4626,20 +4637,26 @@ multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> { multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm, SDPatternOperator op> { - def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; - def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; - def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; + def _H : sve2_int_absdiff_accum<0b101, { 0b00, opc }, asm, ZPR16, ZPR8>; + def _S : sve2_int_absdiff_accum<0b110, { 0b00, opc }, asm, ZPR32, ZPR16>; + def _D : sve2_int_absdiff_accum<0b111, { 0b00, opc }, asm, ZPR64, ZPR32>; def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>; def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>; def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>; } +multiclass sve2_int_two_way_absdiff_accum_long<bit U, string asm> { + def _BtoH : sve2_int_absdiff_accum<0b001, { 0b01, U, 0b1 }, asm, ZPR16, ZPR8>; + def _HtoS : sve2_int_absdiff_accum<0b010, { 0b01, U, 0b1 }, asm, ZPR32, ZPR16>; + def _StoD : sve2_int_absdiff_accum<0b011, { 0b01, U, 0b1 }, asm, ZPR64, ZPR32>; +} + multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm, SDPatternOperator op> { - def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm, + def _S : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b0 }, { 0b010, opc{0} }, asm, ZPR32, ZPR32>; - def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm, + def _D : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b1 }, { 0b010, opc{0} }, asm, ZPR64, ZPR64>; def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; @@ -9610,17 +9627,18 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> { // SVE Floating Point Matrix Multiply Accumulate Group //===----------------------------------------------------------------------===// -class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty> +class sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty> : I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm), asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; bits<5> Zm; let Inst{31-24} = 0b01100100; - let Inst{23-22} = opc; + let Inst{23-22} = opc{2-1}; let Inst{21} = 1; let Inst{20-16} = Zm; - let Inst{15-10} = 0b111001; + let Inst{15-11} = 0b11100; + let Inst{10} = opc{0}; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -9630,10 +9648,12 @@ class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_t let mayRaiseFPException = 1; } -multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> { +multiclass sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty, + ZPRRegOp reg_ty, SDPatternOperator op, + ValueType zda_vt, ValueType reg_vt> { def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>; - def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>; + def : SVE_3_Op_Pat<zda_vt, op, zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>; } //===----------------------------------------------------------------------===// @@ -10030,18 +10050,19 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte } // SVE2 multi-vec shift narrow -class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), - mnemonic, "\t$Zd, $Zn, $imm4", +class sve2p1_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, + Operand immtype, bits<3> opc, bits<2> tsz> + : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn, immtype:$imm), + mnemonic, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<5> Zd; bits<4> Zn; - bits<4> imm4; + bits<4> imm; let Inst{31-23} = 0b010001011; let Inst{22} = tsz{1}; let Inst{21} = 0b1; let Inst{20} = tsz{0}; - let Inst{19-16} = imm4; + let Inst{18-16} = imm{2-0}; // imm3 let Inst{15-14} = 0b00; let Inst{13-11} = opc; let Inst{10} = 0b0; @@ -10052,12 +10073,19 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> let hasSideEffects = 0; } -multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { - def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>; +multiclass sve_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { + def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR16, ZZ_s_mul_r, vecshiftR16, opc, 0b01> { + let Inst{19} = imm{3}; // imm4 + } def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } +multiclass sve_multi_vec_round_shift_narrow<string mnemonic, bits<3> opc> { + def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR8, ZZ_h_mul_r, vecshiftR8, opc, 0b00> { + let Inst{19} = 0b1; // always 1 for imm3 version + } +} // SME2 multi-vec contiguous load (scalar plus scalar, two registers) class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n, @@ -11164,7 +11192,7 @@ multiclass sve2_fp8_dot_indexed_s<string asm, SDPatternOperator op> { def : SVE_4_Op_Pat<nxv4f32, op, nxv4f32, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME)>; } -// FP8 Look up table +// Look up table class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty, Operand idx_ty, bits<4>opc, string mnemonic> : I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx), @@ -11183,7 +11211,7 @@ class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty, let Inst{4-0} = Zd; } -// FP8 Look up table read with 2-bit indices +// Look up table read with 2-bit indices multiclass sve2_luti2_vector_index<string mnemonic> { def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> { bits<2> idx; @@ -11205,7 +11233,7 @@ multiclass sve2_luti2_vector_index<string mnemonic> { i32, timm32_0_7, !cast<Instruction>(NAME # _H)>; } -// FP8 Look up table read with 4-bit indices +// Look up table read with 4-bit indices multiclass sve2_luti4_vector_index<string mnemonic> { def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> { bit idx; @@ -11226,7 +11254,7 @@ multiclass sve2_luti4_vector_index<string mnemonic> { i32, timm32_0_3, !cast<Instruction>(NAME # _H)>; } -// FP8 Look up table read with 4-bit indices (two contiguous registers) +// Look up table read with 4-bit indices (two contiguous registers) multiclass sve2_luti4_vector_vg2_index<string mnemonic> { def NAME : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> { bits<2> idx; @@ -11250,6 +11278,29 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> { nxv16i8:$Op3, timm32_0_3:$Op4))>; } +// Look up table read with 6-bit indices +multiclass sve2_luti6_vector_index<string mnemonic> { + def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, mnemonic> { + bit idx; + let Inst{23} = idx; + } +} + +// Look up table +class sve2_luti6_vector<string mnemonic> + : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-21} = 0b01000101001; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b101011; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + //===----------------------------------------------------------------------===// // Checked Pointer Arithmetic (FEAT_CPA) //===----------------------------------------------------------------------===// @@ -11280,3 +11331,49 @@ class sve_int_mla_cpa<string asm> let ElementSize = ZPR64.ElementSize; } + +//===----------------------------------------------------------------------===// +// FP to Int down-converts +//===----------------------------------------------------------------------===// +class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, bits<2> size, bit U> + : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<4> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = size; + let Inst{21-11} = 0b00110100110; + let Inst{10} = U; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-0} = Zd; +} + +multiclass sve2_fp_to_int_downcvt<string asm, bit U> { + def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>; + def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>; + def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>; +} + +//===----------------------------------------------------------------------===// +// Int to FP up-converts +//===----------------------------------------------------------------------===// +class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC, + bits<2> size, bits<2> U> + : I<(outs ZdRC:$Zd), (ins ZnRC:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = size; + let Inst{21-12} = 0b0011000011; + let Inst{11-10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> { + def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>; + def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>; + def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>; +} diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index d6cb0e8..268a229 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -139,6 +139,13 @@ namespace llvm { } namespace llvm { +namespace AArch64CMHPriorityHint { +#define GET_CMHPRIORITYHINT_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64CMHPriorityHint +} // namespace llvm + +namespace llvm { namespace AArch64SysReg { #define GET_SysRegsList_IMPL #include "AArch64GenSystemOperands.inc" @@ -190,6 +197,32 @@ namespace AArch64TLBIP { #define GET_TLBIPTable_IMPL #include "AArch64GenSystemOperands.inc" } // namespace AArch64TLBIP + +namespace AArch64MLBI { +#define GET_MLBITable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64MLBI +} // namespace llvm + +namespace llvm { +namespace AArch64GIC { +#define GET_GICTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GIC +} // namespace llvm + +namespace llvm { +namespace AArch64GICR { +#define GET_GICRTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GICR +} // namespace llvm + +namespace llvm { +namespace AArch64GSB { +#define GET_GSBTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GSB } // namespace llvm namespace llvm { diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index fea33ef..27812e9 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -409,6 +409,16 @@ struct SysAliasReg : SysAlias { : SysAlias(N, E, F), NeedsReg(R) {} }; +struct SysAliasOptionalReg : SysAlias { + bool NeedsReg; + bool OptionalReg; + constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O) + : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {} + constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O, + FeatureBitset F) + : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {} +}; + struct SysAliasImm : SysAlias { uint16_t ImmValue; constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I) @@ -677,6 +687,14 @@ namespace AArch64BTIHint { #include "AArch64GenSystemOperands.inc" } +namespace AArch64CMHPriorityHint { +struct CMHPriorityHint : SysAlias { + using SysAlias::SysAlias; +}; +#define GET_CMHPRIORITYHINT_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64CMHPriorityHint + namespace AArch64SME { enum ToggleCondition : unsigned { Always, @@ -788,21 +806,53 @@ namespace AArch64SysReg { } namespace AArch64TLBI { - struct TLBI : SysAliasReg { - using SysAliasReg::SysAliasReg; - }; - #define GET_TLBITable_DECL - #include "AArch64GenSystemOperands.inc" +struct TLBI : SysAliasOptionalReg { + using SysAliasOptionalReg::SysAliasOptionalReg; +}; +#define GET_TLBITable_DECL +#include "AArch64GenSystemOperands.inc" } namespace AArch64TLBIP { -struct TLBIP : SysAliasReg { - using SysAliasReg::SysAliasReg; +struct TLBIP : SysAliasOptionalReg { + using SysAliasOptionalReg::SysAliasOptionalReg; }; #define GET_TLBIPTable_DECL #include "AArch64GenSystemOperands.inc" } // namespace AArch64TLBIP +namespace AArch64MLBI { +struct MLBI : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_MLBITable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64MLBI + +namespace AArch64GIC { +struct GIC : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_GICTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GIC + +namespace AArch64GICR { +struct GICR : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_GICRTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GICR + +namespace AArch64GSB { +struct GSB : SysAlias { + using SysAlias::SysAlias; +}; +#define GET_GSBTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64GSB + namespace AArch64II { /// Target Operand Flag enum. enum TOF { diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index d71f728..085c8588 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -75,8 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { } void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName, - const AArch64TargetLowering &TLI) { - RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName); + const RTLIB::RuntimeLibcallsInfo &RTLCI) { + RTLIB::LibcallImpl Impl = RTLCI.getSupportedLibcallImpl(FuncName); if (Impl == RTLIB::Unsupported) return; unsigned KnownAttrs = SMEAttrs::Normal; @@ -124,21 +124,22 @@ bool SMECallAttrs::requiresSMChange() const { return true; } -SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) +SMECallAttrs::SMECallAttrs(const CallBase &CB, + const RTLIB::RuntimeLibcallsInfo *RTLCI) : CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal), Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) { if (auto *CalledFunction = CB.getCalledFunction()) - CalledFn = SMEAttrs(*CalledFunction, TLI); - - // An `invoke` of an agnostic ZA function may not return normally (it may - // resume in an exception block). In this case, it acts like a private ZA - // callee and may require a ZA save to be set up before it is called. - if (isa<InvokeInst>(CB)) - CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + CalledFn = SMEAttrs(*CalledFunction, RTLCI); // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || ((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) && "SME attributes at callsite do not match declaration"); + + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index d26e3cd..28c397e 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -12,8 +12,9 @@ #include "llvm/IR/Function.h" namespace llvm { - -class AArch64TargetLowering; +namespace RTLIB { +struct RuntimeLibcallsInfo; +} class Function; class CallBase; @@ -52,14 +53,14 @@ public: SMEAttrs() = default; SMEAttrs(unsigned Mask) { set(Mask); } - SMEAttrs(const Function &F, const AArch64TargetLowering *TLI = nullptr) + SMEAttrs(const Function &F, const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr) : SMEAttrs(F.getAttributes()) { - if (TLI) - addKnownFunctionAttrs(F.getName(), *TLI); + if (RTLCI) + addKnownFunctionAttrs(F.getName(), *RTLCI); } SMEAttrs(const AttributeList &L); - SMEAttrs(StringRef FuncName, const AArch64TargetLowering &TLI) { - addKnownFunctionAttrs(FuncName, TLI); + SMEAttrs(StringRef FuncName, const RTLIB::RuntimeLibcallsInfo &RTLCI) { + addKnownFunctionAttrs(FuncName, RTLCI); }; void set(unsigned M, bool Enable = true) { @@ -157,7 +158,7 @@ public: private: void addKnownFunctionAttrs(StringRef FuncName, - const AArch64TargetLowering &TLI); + const RTLIB::RuntimeLibcallsInfo &RTLCI); void validate() const; }; @@ -175,7 +176,7 @@ public: SMEAttrs Callsite = SMEAttrs::Normal) : CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {} - SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI); + SMECallAttrs(const CallBase &CB, const RTLIB::RuntimeLibcallsInfo *RTLCI); SMEAttrs &caller() { return CallerFn; } SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ea32748..1c8383c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1430,6 +1430,18 @@ def FeatureAddSubU64Insts def FeatureMadU32Inst : SubtargetFeature<"mad-u32-inst", "HasMadU32Inst", "true", "Has v_mad_u32 instruction">; +def FeatureAddMinMaxInsts : SubtargetFeature<"add-min-max-insts", + "HasAddMinMaxInsts", + "true", + "Has v_add_{min|max}_{i|u}32 instructions" +>; + +def FeaturePkAddMinMaxInsts : SubtargetFeature<"pk-add-min-max-insts", + "HasPkAddMinMaxInsts", + "true", + "Has v_pk_add_{min|max}_{i|u}16 instructions" +>; + def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts", "HasVMemToLDSLoad", "true", @@ -2115,6 +2127,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLshlAddU64Inst, FeatureAddSubU64Insts, FeatureMadU32Inst, + FeatureAddMinMaxInsts, + FeaturePkAddMinMaxInsts, FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, @@ -2658,11 +2672,11 @@ def HasFmaakFmamkF64Insts : def HasAddMinMaxInsts : Predicate<"Subtarget->hasAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeatureAddMinMaxInsts)>; def HasPkAddMinMaxInsts : Predicate<"Subtarget->hasPkAddMinMaxInsts()">, - AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + AssemblerPredicate<(any_of FeaturePkAddMinMaxInsts)>; def HasPkMinMax3Insts : Predicate<"Subtarget->hasPkMinMax3Insts()">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index e8b211f..7f00ead 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -176,6 +176,19 @@ def binop_s64_with_s32_mask_combines : GICombineGroup<[ combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask ]>; +// (or i64:x, (zext i32:y)) -> i64:(merge (or lo_32(x), i32:y), hi_32(x)) +// (or (zext i32:y), i64:x) -> i64:(merge (or lo_32(x), i32:y), hi_32(x)) +def or_s64_zext_s32_frag : GICombinePatFrag<(outs root:$dst), (ins $src_s64, $src_s32), + [(pattern (G_OR $dst, i64:$src_s64, i64:$zext_val), (G_ZEXT i64:$zext_val, i32:$src_s32)), + (pattern (G_OR $dst, i64:$zext_val, i64:$src_s64), (G_ZEXT i64:$zext_val, i32:$src_s32))]>; + +def combine_or_s64_s32 : GICombineRule< + (defs root:$dst), + (match (or_s64_zext_s32_frag $dst, i64:$x, i32:$y):$dst), + (apply (G_UNMERGE_VALUES $x_lo, $x_hi, $x), + (G_OR $or, $x_lo, $y), + (G_MERGE_VALUES $dst, $or, $x_hi))>; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This // saves one instruction compared to the promotion. @@ -206,7 +219,7 @@ def AMDGPUPreLegalizerCombiner: GICombiner< "AMDGPUPreLegalizerCombinerImpl", [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg, combine_shuffle_vector_to_build_vector, - binop_s64_with_s32_mask_combines]> { + binop_s64_with_s32_mask_combines, combine_or_s64_s32]> { let CombineAllMethodName = "tryCombineAllImpl"; } @@ -215,7 +228,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner< [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64, - binop_s64_with_s32_mask_combines]> { + binop_s64_with_s32_mask_combines, combine_or_s64_s32]> { let CombineAllMethodName = "tryCombineAllImpl"; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8ed4062..1b559a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -514,8 +514,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - setOperationAction({ISD::ABS, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, - MVT::i32, Legal); + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32, + Legal); setOperationAction( {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 596a895..1a13b22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FPOpActions.clampMaxNumElementsStrict(0, S32, 2); } + auto &MinNumMaxNumIeee = + getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE}); + + if (ST.hasVOP3PInsts()) { + MinNumMaxNumIeee.legalFor(FPTypesPK16) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .clampMaxNumElements(0, S16, 2) + .clampScalar(0, S16, S64) + .scalarize(0); + } else if (ST.has16BitInsts()) { + MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0); + } else { + MinNumMaxNumIeee.legalFor(FPTypesBase) + .clampScalar(0, S32, S64) + .scalarize(0); + } + auto &MinNumMaxNum = getActionDefinitionsBuilder( - {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE, - G_FMAXNUM_IEEE}); + {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM}); if (ST.hasVOP3PInsts()) { MinNumMaxNum.customFor(FPTypesPK16) @@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor(FPTypesPK16) .clampMaxNumElements(0, S16, 2) .scalarize(0); + } else if (ST.hasVOP3PInsts()) { + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) + .lowerFor({V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .lower(); } else { - // TODO: Implement - getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) + .scalarize(0) + .clampScalar(0, S32, S64) + .lower(); } getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET}) @@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom( case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINIMUMNUM: case TargetOpcode::G_FMAXIMUMNUM: - case TargetOpcode::G_FMINNUM_IEEE: - case TargetOpcode::G_FMAXNUM_IEEE: return legalizeMinNumMaxNum(Helper, MI); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return legalizeExtractVectorElt(MI, MRI, B); @@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineFunction &MF = Helper.MIRBuilder.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE || - MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE; - - // With ieee_mode disabled, the instructions have the correct behavior - // already for G_FMINIMUMNUM/G_FMAXIMUMNUM. - // - // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode - // enabled. - if (!MFI->getMode().IEEE) { - if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM || - MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM) - return true; - - return !IsIEEEOp; - } - - if (IsIEEEOp) + // With ieee_mode disabled, the instructions have the correct behavior. + if (!MFI->getMode().IEEE) return true; return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 56807a4..54ba2f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4835,6 +4835,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_perm_pk16_b4_u4: case Intrinsic::amdgcn_perm_pk16_b6_u4: case Intrinsic::amdgcn_perm_pk16_b8_u4: + case Intrinsic::amdgcn_add_max_i32: + case Intrinsic::amdgcn_add_max_u32: + case Intrinsic::amdgcn_add_min_i32: + case Intrinsic::amdgcn_add_min_u32: + case Intrinsic::amdgcn_pk_add_max_i16: + case Intrinsic::amdgcn_pk_add_max_u16: + case Intrinsic::amdgcn_pk_add_min_i16: + case Intrinsic::amdgcn_pk_add_min_u16: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_exp2: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 996b55f..02c5390 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2086,7 +2086,7 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy)); - addPass(AtomicExpandPass(&TM)); + addPass(AtomicExpandPass(TM)); if (TM.getOptLevel() > CodeGenOptLevel::None) { addPass(AMDGPUPromoteAllocaPass(TM)); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 99ba043..5580e4c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1860,7 +1860,6 @@ private: bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, const unsigned CPol); bool validateTFE(const MCInst &Inst, const OperandVector &Operands); - bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands); bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands); bool validateWMMA(const MCInst &Inst, const OperandVector &Operands); unsigned getConstantBusLimit(unsigned Opcode) const; @@ -5506,22 +5505,6 @@ bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, return true; } -bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst, - const OperandVector &Operands) { - if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12) - return true; - - int Simm16Pos = - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16); - if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) { - SMLoc Loc = Operands[1]->getStartLoc(); - Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]"); - return false; - } - - return true; -} - bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst, const OperandVector &Operands) { unsigned Opc = Inst.getOpcode(); @@ -5681,9 +5664,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc, if (!validateTFE(Inst, Operands)) { return false; } - if (!validateSetVgprMSB(Inst, Operands)) { - return false; - } if (!validateWMMA(Inst, Operands)) { return false; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index a466780..ac660d5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -277,6 +277,8 @@ protected: bool HasLshlAddU64Inst = false; bool HasAddSubU64Insts = false; bool HasMadU32Inst = false; + bool HasAddMinMaxInsts = false; + bool HasPkAddMinMaxInsts = false; bool HasPointSampleAccel = false; bool HasLdsBarrierArriveAtomic = false; bool HasSetPrioIncWgInst = false; @@ -1567,10 +1569,10 @@ public: bool hasIntMinMax64() const { return GFX1250Insts; } // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions. - bool hasAddMinMaxInsts() const { return GFX1250Insts; } + bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; } // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions. - bool hasPkAddMinMaxInsts() const { return GFX1250Insts; } + bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; } // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 09ef6ac..2aa54c9 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -45,9 +45,6 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom); - // 32-bit ABS is legal for AMDGPU except for R600 - setOperationAction(ISD::ABS, MVT::i32, Expand); - // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address // spaces, so it is custom lowered to handle those where it isn't. for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a757421..be42291 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -298,7 +298,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand); - setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal); + setOperationAction({ISD::ABS, ISD::UADDO, ISD::USUBO}, MVT::i32, Legal); setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY}, MVT::i32, Legal); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2ff2d2f..d930a21 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10628,6 +10628,59 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue)) return false; + const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, + this]() -> bool { + if (CmpValue != 0) + return false; + + MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); + if (!Def || Def->getParent() != CmpInstr.getParent()) + return false; + + const auto foldableSelect = [](MachineInstr *Def) -> bool { + if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 || + Def->getOpcode() == AMDGPU::S_CSELECT_B64) { + bool Op1IsNonZeroImm = + Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0; + bool Op2IsZeroImm = + Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0; + if (Op1IsNonZeroImm && Op2IsZeroImm) + return true; + } + return false; + }; + + // For S_OP that set SCC = DST!=0, do the transformation + // + // s_cmp_lg_* (S_OP ...), 0 => (S_OP ...) + + // If foldableSelect, s_cmp_lg_* is redundant because the SCC input value + // for S_CSELECT* already has the same value that will be calculated by + // s_cmp_lg_* + // + // s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero + // imm), 0) + if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def)) + return false; + + MachineInstr *KillsSCC = nullptr; + for (MachineInstr &MI : + make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) { + if (MI.modifiesRegister(AMDGPU::SCC, &RI)) + return false; + if (MI.killsRegister(AMDGPU::SCC, &RI)) + KillsSCC = &MI; + } + + if (MachineOperand *SccDef = + Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)) + SccDef->setIsDead(false); + if (KillsSCC) + KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr); + CmpInstr.eraseFromParent(); + return true; + }; + const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, this](int64_t ExpectedValue, unsigned SrcSize, bool IsReversible, bool IsSigned) -> bool { @@ -10702,16 +10755,20 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) return false; - for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); - I != E; ++I) { - if (I->modifiesRegister(AMDGPU::SCC, &RI) || - I->killsRegister(AMDGPU::SCC, &RI)) + MachineInstr *KillsSCC = nullptr; + for (MachineInstr &MI : + make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) { + if (MI.modifiesRegister(AMDGPU::SCC, &RI)) return false; + if (MI.killsRegister(AMDGPU::SCC, &RI)) + KillsSCC = &MI; } MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr); SccDef->setIsDead(false); + if (KillsSCC) + KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr); CmpInstr.eraseFromParent(); if (!MRI->use_nodbg_empty(DefReg)) { @@ -10755,7 +10812,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, case AMDGPU::S_CMP_LG_I32: case AMDGPU::S_CMPK_LG_U32: case AMDGPU::S_CMPK_LG_I32: - return optimizeCmpAnd(0, 32, true, false); + return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect(); case AMDGPU::S_CMP_GT_U32: case AMDGPU::S_CMPK_GT_U32: return optimizeCmpAnd(0, 32, false, false); @@ -10763,7 +10820,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, case AMDGPU::S_CMPK_GT_I32: return optimizeCmpAnd(0, 32, false, true); case AMDGPU::S_CMP_LG_U64: - return optimizeCmpAnd(0, 64, true, false); + return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect(); } return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e1d7a07..5fdedda 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -714,6 +714,52 @@ public: } } + static bool setsSCCifResultIsNonZero(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_ABSDIFF_I32: + case AMDGPU::S_ABS_I32: + case AMDGPU::S_AND_B32: + case AMDGPU::S_AND_B64: + case AMDGPU::S_ANDN2_B32: + case AMDGPU::S_ANDN2_B64: + case AMDGPU::S_ASHR_I32: + case AMDGPU::S_ASHR_I64: + case AMDGPU::S_BCNT0_I32_B32: + case AMDGPU::S_BCNT0_I32_B64: + case AMDGPU::S_BCNT1_I32_B32: + case AMDGPU::S_BCNT1_I32_B64: + case AMDGPU::S_BFE_I32: + case AMDGPU::S_BFE_I64: + case AMDGPU::S_BFE_U32: + case AMDGPU::S_BFE_U64: + case AMDGPU::S_LSHL_B32: + case AMDGPU::S_LSHL_B64: + case AMDGPU::S_LSHR_B32: + case AMDGPU::S_LSHR_B64: + case AMDGPU::S_NAND_B32: + case AMDGPU::S_NAND_B64: + case AMDGPU::S_NOR_B32: + case AMDGPU::S_NOR_B64: + case AMDGPU::S_NOT_B32: + case AMDGPU::S_NOT_B64: + case AMDGPU::S_OR_B32: + case AMDGPU::S_OR_B64: + case AMDGPU::S_ORN2_B32: + case AMDGPU::S_ORN2_B64: + case AMDGPU::S_QUADMASK_B32: + case AMDGPU::S_QUADMASK_B64: + case AMDGPU::S_WQM_B32: + case AMDGPU::S_WQM_B64: + case AMDGPU::S_XNOR_B32: + case AMDGPU::S_XNOR_B64: + case AMDGPU::S_XOR_B32: + case AMDGPU::S_XOR_B64: + return true; + default: + return false; + } + } + static bool isEXP(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::EXP; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 7cce033..05ba76a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in { - defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; - defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; + defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_i32>; + defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_max_u32>; + defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_i32>; + defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>, int_amdgcn_add_min_u32>; } defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; @@ -976,10 +976,10 @@ def : GCNPat < } // End SubtargetPredicate = HasLshlAddU64Inst let SubtargetPredicate = HasAddMinMaxInsts in { -def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>; -def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>; -def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>; -def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>; +def : ThreeOp_i32_Pats<saddsat, smax, V_ADD_MAX_I32_e64>; +def : ThreeOp_i32_Pats<uaddsat, umax, V_ADD_MAX_U32_e64>; +def : ThreeOp_i32_Pats<saddsat, smin, V_ADD_MIN_I32_e64>; +def : ThreeOp_i32_Pats<uaddsat, umin, V_ADD_MIN_U32_e64>; } def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 6500fce..4ae2c1e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -75,7 +75,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit IsDOT = 0> { def NAME : VOP3P_Pseudo<OpName, P, !if (P.HasModifiers, - getVOP3PModPat<P, node, IsDOT, IsDOT>.ret, + getVOP3PModPat<P, node, !or(P.EnableClamp, IsDOT), IsDOT>.ret, getVOP3Pat<P, node>.ret)>; let SubtargetPredicate = isGFX11Plus in { if P.HasExtVOP3DPP then @@ -434,15 +434,16 @@ defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>; } // End SubtargetPredicate = HasFmaMixBF16Insts def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> { - let HasModifiers = 0; + let HasNeg = 0; + let EnableClamp = 1; } let isCommutable = 1, isReMaterializable = 1 in { let SubtargetPredicate = HasPkAddMinMaxInsts in { -defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile>; -defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile>; +defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_i16>; +defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_max_u16>; +defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_i16>; +defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile, int_amdgcn_pk_add_min_u16>; } let SubtargetPredicate = HasPkMinMax3Insts in { defm V_PK_MAX3_I16 : VOP3PInst<"v_pk_max3_i16", PK_ADD_MINMAX_Profile>; @@ -463,10 +464,10 @@ class ThreeOp_OpSelClampPats <SDPatternOperator op1, SDPatternOperator op2, >; let SubtargetPredicate = HasPkAddMinMaxInsts in { -def : ThreeOp_OpSelClampPats<add, smax, V_PK_ADD_MAX_I16>; -def : ThreeOp_OpSelClampPats<add, umax, V_PK_ADD_MAX_U16>; -def : ThreeOp_OpSelClampPats<add, smin, V_PK_ADD_MIN_I16>; -def : ThreeOp_OpSelClampPats<add, umin, V_PK_ADD_MIN_U16>; +def : ThreeOp_OpSelClampPats<saddsat, smax, V_PK_ADD_MAX_I16>; +def : ThreeOp_OpSelClampPats<uaddsat, umax, V_PK_ADD_MAX_U16>; +def : ThreeOp_OpSelClampPats<saddsat, smin, V_PK_ADD_MIN_I16>; +def : ThreeOp_OpSelClampPats<uaddsat, umin, V_PK_ADD_MIN_U16>; } let SubtargetPredicate = HasPkMinMax3Insts in { diff --git a/llvm/lib/Target/ARM/ARMArchitectures.td b/llvm/lib/Target/ARM/ARMArchitectures.td index 301ed5b..bfcecfe 100644 --- a/llvm/lib/Target/ARM/ARMArchitectures.td +++ b/llvm/lib/Target/ARM/ARMArchitectures.td @@ -297,6 +297,18 @@ def ARMv96a : Architecture<"armv9.6-a", "ARMv96a", [HasV9_6aOps, FeatureCRC, FeatureRAS, FeatureDotProd]>; +def ARMv97a : Architecture<"armv9.7-a", "ARMv97a", [HasV9_7aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 3368a50..36b9908 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1471,6 +1471,435 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { // instructions) auto-generated. #include "ARMGenMCPseudoLowering.inc" +// Helper function to check if a register is live (used as an implicit operand) +// in the given call instruction. +static bool isRegisterLiveInCall(const MachineInstr &Call, MCRegister Reg) { + for (const MachineOperand &MO : Call.implicit_operands()) { + if (MO.isReg() && MO.getReg() == Reg && MO.isUse()) { + return true; + } + } + return false; +} + +void ARMAsmPrinter::EmitKCFI_CHECK_ARM32(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // Choose scratch register: r12 primary, r3 if target is r12. + unsigned ScratchReg = ARM::R12; + if (AddrReg == ARM::R12) { + ScratchReg = ARM::R3; + } + + // Calculate ESR for ARM mode (16-bit): 0x8000 | (scratch_reg << 5) | addr_reg + // Note: scratch_reg is always 0x1F since the EOR sequence clobbers it. + const ARMBaseRegisterInfo *TRI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + unsigned AddrIndex = TRI->getEncodingValue(AddrReg); + unsigned ESR = 0x8000 | (31 << 5) | (AddrIndex & 31); + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = + (ScratchReg == ARM::R3) && isRegisterLiveInCall(Call, ARM::R3); + + // If we need to spill r3, push it first. + if (NeedSpillR3) { + // push {r3} + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::STMDB_UPD) + .addReg(ARM::SP) + .addReg(ARM::SP) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(ARM::R3)); + } + + // Clear bit 0 of target address to handle Thumb function pointers. + // In 32-bit ARM, function pointers may have the low bit set to indicate + // Thumb state when ARM/Thumb interworking is enabled (ARMv4T and later). + // We need to clear it to avoid an alignment fault when loading. + // bic scratch, target, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BICri) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(0)); + + // ldr scratch, [scratch, #-(PrefixNops * 4 + 4)] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(-(PrefixNops * 4 + 4)) + .addImm(ARMCC::AL) + .addReg(0)); + + // Each EOR instruction XORs one byte of the type, shifted to its position. + for (int i = 0; i < 4; i++) { + uint8_t byte = (Type >> (i * 8)) & 0xFF; + uint32_t imm = byte << (i * 8); + bool isLast = (i == 3); + + // Encode as ARM modified immediate. + int SOImmVal = ARM_AM::getSOImmVal(imm); + assert(SOImmVal != -1 && + "Cannot encode immediate as ARM modified immediate"); + + // eor[s] scratch, scratch, #imm (last one sets flags with CPSR) + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::EORri) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(SOImmVal) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(isLast ? ARM::CPSR : ARM::NoRegister)); + } + + // If we spilled r3, restore it immediately after the comparison. + // This must happen before the branch so r3 is valid on both paths. + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDMIA_UPD) + .addReg(ARM::SP) + .addReg(ARM::SP) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch is zero) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::Bcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // udf #ESR (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::UDF).addImm(ESR)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // Choose scratch register: r12 primary, r3 if target is r12. + unsigned ScratchReg = ARM::R12; + if (AddrReg == ARM::R12) { + ScratchReg = ARM::R3; + } + + // Calculate ESR for Thumb mode (8-bit): 0x80 | addr_reg + // Bit 7: KCFI trap indicator + // Bits 6-5: Reserved + // Bits 4-0: Address register encoding + const ARMBaseRegisterInfo *TRI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + unsigned AddrIndex = TRI->getEncodingValue(AddrReg); + unsigned ESR = 0x80 | (AddrIndex & 0x1F); + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = + (ScratchReg == ARM::R3) && isRegisterLiveInCall(Call, ARM::R3); + + // If we need to spill r3, push it first. + if (NeedSpillR3) { + // push {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // Clear bit 0 of target address to handle Thumb function pointers. + // In 32-bit ARM, function pointers may have the low bit set to indicate + // Thumb state when ARM/Thumb interworking is enabled (ARMv4T and later). + // We need to clear it to avoid an alignment fault when loading. + // bic scratch, target, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2BICri) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(0)); + + // ldr scratch, [scratch, #-(PrefixNops * 4 + 4)] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(-(PrefixNops * 4 + 4)) + .addImm(ARMCC::AL) + .addReg(0)); + + // Each EOR instruction XORs one byte of the type, shifted to its position. + for (int i = 0; i < 4; i++) { + uint8_t byte = (Type >> (i * 8)) & 0xFF; + uint32_t imm = byte << (i * 8); + bool isLast = (i == 3); + + // Verify the immediate can be encoded as Thumb2 modified immediate. + assert(ARM_AM::getT2SOImmVal(imm) != -1 && + "Cannot encode immediate as Thumb2 modified immediate"); + + // eor[s] scratch, scratch, #imm (last one sets flags with CPSR) + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::t2EORri) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(imm) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(isLast ? ARM::CPSR : ARM::NoRegister)); + } + + // If we spilled r3, restore it immediately after the comparison. + // This must happen before the branch so r3 is valid on both paths. + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch is zero) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::t2Bcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // udf #ESR (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tUDF).addImm(ESR)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::EmitKCFI_CHECK_Thumb1(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // For Thumb1, use R2 unconditionally as scratch register (a low register + // required for tLDRi). R3 is used for building the type hash. + unsigned ScratchReg = ARM::R2; + unsigned TempReg = ARM::R3; + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = isRegisterLiveInCall(Call, ARM::R3); + + // Spill r3 if needed + if (NeedSpillR3) { + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // Check if r2 is live and needs to be spilled. + bool NeedSpillR2 = isRegisterLiveInCall(Call, ARM::R2); + + // Push R2 if it's live + if (NeedSpillR2) { + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R2)); + } + + // Clear bit 0 from target address + // TempReg (R3) is used first as helper for BIC, then later for building type + // hash. + + // movs temp, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0)); + + // mov scratch, target + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(ARMCC::AL)); + + // bics scratch, temp (scratch = scratch & ~temp) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBIC) + .addReg(ScratchReg) + .addReg(ARM::CPSR) + .addReg(ScratchReg) + .addReg(TempReg) + .addImm(ARMCC::AL) + .addReg(0)); + + // Load type hash. Thumb1 doesn't support negative offsets, so subtract. + int offset = PrefixNops * 4 + 4; + + // subs scratch, #offset + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tSUBi8) + .addReg(ScratchReg) + .addReg(ARM::CPSR) + .addReg(ScratchReg) + .addImm(offset) + .addImm(ARMCC::AL) + .addReg(0)); + + // ldr scratch, [scratch, #0] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(0)); + + // Load expected type inline (instead of EOR sequence) + // + // This creates the 32-bit value byte-by-byte in the temp register: + // movs temp, #byte3 (high byte) + // lsls temp, temp, #8 + // adds temp, #byte2 + // lsls temp, temp, #8 + // adds temp, #byte1 + // lsls temp, temp, #8 + // adds temp, #byte0 (low byte) + + uint8_t byte0 = (Type >> 0) & 0xFF; + uint8_t byte1 = (Type >> 8) & 0xFF; + uint8_t byte2 = (Type >> 16) & 0xFF; + uint8_t byte3 = (Type >> 24) & 0xFF; + + // movs temp, #byte3 (start with high byte) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addImm(byte3) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte2 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte2) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte1) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte0 (low byte) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte0) + .addImm(ARMCC::AL) + .addReg(0)); + + // cmp scratch, temp + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tCMPr) + .addReg(ScratchReg) + .addReg(TempReg) + .addImm(ARMCC::AL) + .addReg(0)); + + // Restore registers if spilled (pop in reverse order of push: R2, then R3) + if (NeedSpillR2) { + // pop {r2} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R2)); + } + + // Restore r3 if spilled + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch == temp) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::tBcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // bkpt #0 (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBKPT).addImm(0)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { + Register AddrReg = MI.getOperand(0).getReg(); + const int64_t Type = MI.getOperand(1).getImm(); + + // Get the call instruction that follows this KCFI_CHECK. + assert(std::next(MI.getIterator())->isCall() && + "KCFI_CHECK not followed by a call instruction"); + const MachineInstr &Call = *std::next(MI.getIterator()); + + // Adjust the offset for patchable-function-prefix. + int64_t PrefixNops = 0; + MI.getMF() + ->getFunction() + .getFnAttribute("patchable-function-prefix") + .getValueAsString() + .getAsInteger(10, PrefixNops); + + // Emit the appropriate instruction sequence based on the opcode variant. + switch (MI.getOpcode()) { + case ARM::KCFI_CHECK_ARM: + EmitKCFI_CHECK_ARM32(AddrReg, Type, Call, PrefixNops); + break; + case ARM::KCFI_CHECK_Thumb2: + EmitKCFI_CHECK_Thumb2(AddrReg, Type, Call, PrefixNops); + break; + case ARM::KCFI_CHECK_Thumb1: + EmitKCFI_CHECK_Thumb1(AddrReg, Type, Call, PrefixNops); + break; + default: + llvm_unreachable("Unexpected KCFI_CHECK opcode"); + } +} + void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { ARM_MC::verifyInstructionPredicates(MI->getOpcode(), getSubtargetInfo().getFeatureBits()); @@ -1504,6 +1933,11 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); + case ARM::KCFI_CHECK_ARM: + case ARM::KCFI_CHECK_Thumb2: + case ARM::KCFI_CHECK_Thumb1: + LowerKCFI_CHECK(*MI); + return; case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h index 2b067c7..9e92b5a 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -123,9 +123,20 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + // KCFI check lowering + void LowerKCFI_CHECK(const MachineInstr &MI); + private: void EmitSled(const MachineInstr &MI, SledKind Kind); + // KCFI check emission helpers + void EmitKCFI_CHECK_ARM32(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + void EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + void EmitKCFI_CHECK_Thumb1(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + // Helpers for emitStartOfAsmFile() and emitEndOfAsmFile() void emitAttributes(); diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 0d7b6d1..fffb6373 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2301,6 +2301,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); + NewMI->setCFIType(*MBB.getParent(), MI.getCFIType()); + // Update call info and delete the pseudo instruction TCRETURN. if (MI.isCandidateForAdditionalCallInfo()) MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI); diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td index 9b1fa5d..e562b21 100644 --- a/llvm/lib/Target/ARM/ARMFeatures.td +++ b/llvm/lib/Target/ARM/ARMFeatures.td @@ -712,6 +712,11 @@ def HasV9_6aOps : SubtargetFeature<"v9.6a", "HasV9_6aOps", "true", "Support ARM v9.6a instructions", [HasV9_5aOps]>; +// Armv9.7-A is a v9-only architecture. +def HasV9_7aOps : SubtargetFeature<"v9.7a", "HasV9_7aOps", "true", + "Support ARM v9.7a instructions", + [HasV9_6aOps]>; + def HasV8_1MMainlineOps : SubtargetFeature< "v8.1m.main", "HasV8_1MMainlineOps", "true", "Support ARM v8-1M Mainline instructions", diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b1a668e..313ae3d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2849,6 +2849,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { MF.getFrameInfo().setHasTailCall(); SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops); + if (CLI.CFIType) + Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); return Ret; @@ -2856,6 +2858,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops); + if (CLI.CFIType) + Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InGlue = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); @@ -12008,6 +12012,71 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody, .add(predOps(ARMCC::AL)); } +bool ARMTargetLowering::supportKCFIBundles() const { + // KCFI is supported in all ARM/Thumb modes + return true; +} + +MachineInstr * +ARMTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const { + assert(MBBI->isCall() && MBBI->getCFIType() && + "Invalid call instruction for a KCFI check"); + + MachineOperand *TargetOp = nullptr; + switch (MBBI->getOpcode()) { + // ARM mode opcodes + case ARM::BLX: + case ARM::BLX_pred: + case ARM::BLX_noip: + case ARM::BLX_pred_noip: + case ARM::BX_CALL: + TargetOp = &MBBI->getOperand(0); + break; + case ARM::TCRETURNri: + case ARM::TCRETURNrinotr12: + case ARM::TAILJMPr: + case ARM::TAILJMPr4: + TargetOp = &MBBI->getOperand(0); + break; + // Thumb mode opcodes (Thumb1 and Thumb2) + // Note: Most Thumb call instructions have predicate operands before the + // target register Format: tBLXr pred, predreg, target_register, ... + case ARM::tBLXr: // Thumb1/Thumb2: BLX register (requires V5T) + case ARM::tBLXr_noip: // Thumb1/Thumb2: BLX register, no IP clobber + case ARM::tBX_CALL: // Thumb1 only: BX call (push LR, BX) + TargetOp = &MBBI->getOperand(2); + break; + // Tail call instructions don't have predicates, target is operand 0 + case ARM::tTAILJMPr: // Thumb1/Thumb2: Tail call via register + TargetOp = &MBBI->getOperand(0); + break; + default: + llvm_unreachable("Unexpected CFI call opcode"); + } + + assert(TargetOp && TargetOp->isReg() && "Invalid target operand"); + TargetOp->setIsRenamable(false); + + // Select the appropriate KCFI_CHECK variant based on the instruction set + unsigned KCFICheckOpcode; + if (Subtarget->isThumb()) { + if (Subtarget->isThumb2()) { + KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2; + } else { + KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1; + } + } else { + KCFICheckOpcode = ARM::KCFI_CHECK_ARM; + } + + return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(KCFICheckOpcode)) + .addReg(TargetOp->getReg()) + .addImm(MBBI->getCFIType()) + .getInstr(); +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -21312,15 +21381,6 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const { TargetLowering::insertSSPDeclarations(M); } -Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const { - // MSVC CRT has a function to validate security cookie. - RTLIB::LibcallImpl SecurityCheckCookie = - getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); - if (SecurityCheckCookie != RTLIB::Unsupported) - return M.getFunction(getLibcallImplName(SecurityCheckCookie)); - return TargetLowering::getSSPStackGuardCheck(M); -} - bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const { // If we do not have NEON, vector types are not natively supported. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 70aa001..357d2c5 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -447,6 +447,12 @@ class VectorType; void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override; + bool supportKCFIBundles() const override; + + MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const override; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; @@ -702,7 +708,6 @@ class VectorType; bool useLoadStackGuardNode(const Module &M) const override; void insertSSPDeclarations(Module &M) const override; - Function *getSSPStackGuardCheck(const Module &M) const override; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 282ff53..10d4cd5 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6536,6 +6536,38 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out), def : Pat<(atomic_fence (timm), 0), (MEMBARRIER)>; //===----------------------------------------------------------------------===// +// KCFI check pseudo-instruction. +//===----------------------------------------------------------------------===// +// KCFI_CHECK pseudo-instruction for Kernel Control-Flow Integrity. +// Expands to a sequence that verifies the function pointer's type hash. +// Different sizes for different architectures due to different expansions. + +def KCFI_CHECK_ARM + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsARM]> { + let Size = 40; // worst-case 10 instructions @ 4 bytes each + // (push, bic, ldr, 4x eor, pop, beq, udf) +} + +def KCFI_CHECK_Thumb2 + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsThumb2]> { + let Size = 34; // worst-case (push.w[2], bic[4], ldr[4], 4x eor[16], pop.w[2], + // beq.w[4], udf[2]) +} + +def KCFI_CHECK_Thumb1 + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsThumb1Only]> { + let Size = 38; // worst-case 19 instructions @ 2 bytes each + // (2x push, 3x bic-helper, subs+ldr, 13x type-building, cmp, + // 2x pop, beq, bkpt) +} + +//===----------------------------------------------------------------------===// // Instructions used for emitting unwind opcodes on Windows. //===----------------------------------------------------------------------===// let isPseudo = 1 in { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 86740a9..590d4c7 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -111,6 +111,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { initializeMVELaneInterleavingPass(Registry); initializeARMFixCortexA57AES1742098Pass(Registry); initializeARMDAGToDAGISelLegacyPass(Registry); + initializeKCFIPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -487,6 +488,9 @@ void ARMPassConfig::addPreSched2() { // proper scheduling. addPass(createARMExpandPseudoPass()); + // Emit KCFI checks for indirect calls. + addPass(createKCFIPass()); + if (getOptLevel() != CodeGenOptLevel::None) { // When optimising for size, always run the Thumb2SizeReduction pass before // IfConversion. Otherwise, check whether IT blocks are restricted @@ -517,9 +521,12 @@ void ARMPassConfig::addPreSched2() { void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); - // Constant island pass work on unbundled instructions. + // Unpack bundles for: + // - Thumb2: Constant island pass requires unbundled instructions + // - KCFI: KCFI_CHECK pseudo instructions need to be unbundled for AsmPrinter addPass(createUnpackMachineBundles([](const MachineFunction &MF) { - return MF.getSubtarget<ARMSubtarget>().isThumb2(); + return MF.getSubtarget<ARMSubtarget>().isThumb2() || + MF.getFunction().getParent()->getModuleFlag("kcfi"); })); // Don't optimize barriers or block placement at -O0. @@ -530,6 +537,7 @@ void ARMPassConfig::addPreEmitPass() { } void ARMPassConfig::addPreEmitPass2() { + // Inserts fixup instructions before unsafe AES operations. Instructions may // be inserted at the start of blocks and at within blocks so this pass has to // come before those below. diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 0796746..94b511a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -895,6 +895,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::ArchKind::ARMV9_4A: case ARM::ArchKind::ARMV9_5A: case ARM::ArchKind::ARMV9_6A: + case ARM::ArchKind::ARMV9_7A: S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false); S.setAttributeItem(ARM_ISA_use, Allowed, false); S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 02fb905..4a2f714 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1504,14 +1504,26 @@ let Defs = [SREG], hasSideEffects = 0 in def FRMIDX : Pseudo<(outs DLDREGS:$dst), (ins DLDREGS:$src, i16imm:$src2), "frmidx\t$dst, $src, $src2", []>; +// The instructions STDSPQRr and STDWSPQRr are used to store to the stack +// frame. The most accurate implementation would be to load the SP into +// a temporary pointer variable and then STDPtrQRr. However for efficiency, +// we assume that R29R28 contains the current call frame pointer. +// However in the PEI pass we sometimes rewrite a ADJCALLSTACKDOWN pseudo, +// plus one or more STDSPQRr/STDWSPQRr pseudo instructions to use Z for a +// stack adjustment then as a base pointer. To avoid corruption, we thus +// specify special classes of registers, like GPR8 and DREGS, but with +// the Z register removed, as the source/input to these instructions. // This pseudo is either converted to a regular store or a push which clobbers // SP. -def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8:$src), +let Defs = [SP], Uses = [SP], hasSideEffects = 0 in +def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8NOZ:$src), "stdstk\t$dst, $src", [(store i8:$src, addr:$dst)]>; +// See the comment on STDSPQRr. // This pseudo is either converted to a regular store or a push which clobbers // SP. -def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGS:$src), +let Defs = [SP], Uses = [SP], hasSideEffects = 0 in +def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGSNOZ:$src), "stdwstk\t$dt, $src", [(store i16:$src, addr:$dt)]>; // SP read/write pseudos. diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td index 182f92c..9b935b1 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.td +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td @@ -211,6 +211,31 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, (add R31R30, R29R28), ptr>; // model this using a register class containing only the Z register. def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>; +// general registers excluding Z register lo/hi, these are the only +// registers that are always safe for STDSPQr instructions +def GPR8NOZ : RegisterClass<"AVR", [i8], 8, + (// Return value and argument registers. + add R24, R25, R18, R19, R20, R21, R22, R23, + // Scratch registers. + R26, R27, + // Callee saved registers. + R28, R29, R17, R16, R15, R14, R13, R12, R11, R10, + R9, R8, R7, R6, R5, R4, R3, R2, R0, R1)>; + +// 16-bit pair register class excluding Z register lo/hi, these are the only +// registers that are always safe for STDWSPQr instructions +def DREGSNOZ : RegisterClass<"AVR", [i16], 8, + (// Return value and arguments. + add R25R24, R19R18, R21R20, R23R22, + // Scratch registers. + R27R26, + // Callee saved registers. + R29R28, R17R16, R15R14, R13R12, R11R10, R9R8, + R7R6, R5R4, R3R2, R1R0, + // Pseudo regs for unaligned 16-bits + R26R25, R24R23, R22R21, R20R19, R18R17, R16R15, + R14R13, R12R11, R10R9)>; + // Register class used for the stack read pseudo instruction. def GPRSP : RegisterClass<"AVR", [i16], 8, (add SP)>; diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 9b5fc9d..a652b7e 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -95,7 +95,24 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) { return; IsCompleted = true; - BTFType.NameOff = BDebug.addString(Name); + switch (Kind) { + case BTF::BTF_KIND_PTR: + case BTF::BTF_KIND_CONST: + case BTF::BTF_KIND_VOLATILE: + case BTF::BTF_KIND_RESTRICT: + // Debug info might contain names for these types, but given that we want + // to keep BTF minimal and naming reference types doesn't bring any value + // (what matters is the completeness of the base type), we don't emit them. + // + // Furthermore, the Linux kernel refuses to load BPF programs that contain + // BTF with these types named: + // https://elixir.bootlin.com/linux/v6.17.1/source/kernel/bpf/btf.c#L2586 + BTFType.NameOff = 0; + break; + default: + BTFType.NameOff = BDebug.addString(Name); + break; + } if (NeedsFixup || !DTy) return; diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index 42e90f0..d6fa65f 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// \file This file contains pases and utilities to convert a modern LLVM +/// \file This file contains passes and utilities to convert a modern LLVM /// module into a module compatible with the LLVM 3.7-based DirectX Intermediate /// Language (DXIL). //===----------------------------------------------------------------------===// @@ -16,7 +16,6 @@ #include "DirectX.h" #include "DirectXIRPasses/PointerTypeAnalysis.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" @@ -27,7 +26,6 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "dxil-prepare" @@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F, F.removeRetAttrs(DeadAttrs); } -static void cleanModuleFlags(Module &M) { - NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); - if (!MDFlags) - return; - - SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries; - M.getModuleFlagsMetadata(FlagEntries); - bool Updated = false; - for (auto &Flag : FlagEntries) { - // llvm 3.7 only supports behavior up to AppendUnique. - if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) - continue; - Flag.Behavior = Module::ModFlagBehavior::Warning; - Updated = true; - } - - if (!Updated) - return; - - MDFlags->eraseFromParent(); - - for (auto &Flag : FlagEntries) - M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); -} - class DXILPrepareModule : public ModulePass { static Value *maybeGenerateBitcast(IRBuilder<> &Builder, @@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass { Builder.getPtrTy(PtrTy->getAddressSpace()))); } - static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) { - return {M.getMDKindID("dx.nonuniform"), - M.getMDKindID("dx.controlflow.hints"), - M.getMDKindID("dx.precise"), - llvm::LLVMContext::MD_range, - llvm::LLVMContext::MD_alias_scope, - llvm::LLVMContext::MD_noalias}; - } - public: bool runOnModule(Module &M) override { PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M); @@ -224,10 +188,7 @@ public: const dxil::ModuleMetadataInfo MetadataInfo = getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata(); VersionTuple ValVer = MetadataInfo.ValidatorVersion; - bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; - - // construct allowlist of valid metadata node kinds - std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M); + bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; for (auto &F : M.functions()) { F.removeFnAttrs(AttrMask); @@ -235,7 +196,7 @@ public: // Only remove string attributes if we are not skipping validation. // This will reserve the experimental attributes when validation version // is 0.0 for experiment mode. - removeStringFunctionAttributes(F, SkipValidation); + removeStringFunctionAttributes(F, AllowExperimental); for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) F.removeParamAttrs(Idx, AttrMask); @@ -243,11 +204,17 @@ public: IRBuilder<> Builder(&BB); for (auto &I : make_early_inc_range(BB)) { - I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + if (auto *CB = dyn_cast<CallBase>(&I)) { + CB->removeFnAttrs(AttrMask); + CB->removeRetAttrs(AttrMask); + for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) + CB->removeParamAttrs(Idx, AttrMask); + continue; + } // Emtting NoOp bitcast instructions allows the ValueEnumerator to be // unmodified as it reserves instruction IDs during contruction. - if (auto LI = dyn_cast<LoadInst>(&I)) { + if (auto *LI = dyn_cast<LoadInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, LI->getPointerOperand(), LI->getType())) { @@ -257,7 +224,7 @@ public: } continue; } - if (auto SI = dyn_cast<StoreInst>(&I)) { + if (auto *SI = dyn_cast<StoreInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, SI->getPointerOperand(), SI->getValueOperand()->getType())) { @@ -268,39 +235,16 @@ public: } continue; } - if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, GEP->getPointerOperand(), GEP->getSourceElementType())) GEP->setOperand(0, NoOpBitcast); continue; } - if (auto *CB = dyn_cast<CallBase>(&I)) { - CB->removeFnAttrs(AttrMask); - CB->removeRetAttrs(AttrMask); - for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) - CB->removeParamAttrs(Idx, AttrMask); - continue; - } } } } - // Remove flags not for DXIL. - cleanModuleFlags(M); - - // dx.rootsignatures will have been parsed from its metadata form as its - // binary form as part of the RootSignatureAnalysisWrapper, so safely - // remove it as it is not recognized in DXIL - if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) - RootSignature->eraseFromParent(); - - // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and - // causes all tests using the DXIL Validator to fail. - // - // This is a temporary fix and should be replaced with a whitelist once - // we have determined all metadata that the DXIL Validator allows - if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) - ErrNo->eraseFromParent(); return true; } @@ -308,11 +252,11 @@ public: DXILPrepareModule() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DXILMetadataAnalysisWrapperPass>(); - AU.addRequired<RootSignatureAnalysisWrapper>(); - AU.addPreserved<RootSignatureAnalysisWrapper>(); - AU.addPreserved<ShaderFlagsAnalysisWrapper>(); + AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); AU.addPreserved<DXILResourceWrapperPass>(); + AU.addPreserved<RootSignatureAnalysisWrapper>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } static char ID; // Pass identification. }; @@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 9eebcc9..1e4797b 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "DXILTranslateMetadata.h" +#include "DXILRootSignature.h" #include "DXILShaderFlags.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, return MDNode::get(Ctx, MDVals); } -MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures, - MDNode *Resources, MDTuple *Properties, - LLVMContext &Ctx) { +static MDTuple *constructEntryMetadata(const Function *EntryFn, + MDTuple *Signatures, MDNode *Resources, + MDTuple *Properties, LLVMContext &Ctx) { // Each entry point metadata record specifies: // * reference to the entry point function global symbol // * unmangled name @@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD, return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx); } -// TODO: We might need to refactor this to be more generic, -// in case we need more metadata to be replaced. -static void translateBranchMetadata(Module &M) { - for (Function &F : M) { - for (BasicBlock &BB : F) { - Instruction *BBTerminatorInst = BB.getTerminator(); +static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) { + MDNode *HlslControlFlowMD = + BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + + if (!HlslControlFlowMD) + return; - MDNode *HlslControlFlowMD = - BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + assert(HlslControlFlowMD->getNumOperands() == 2 && + "invalid operands for hlsl.controlflow.hint"); - if (!HlslControlFlowMD) - continue; + MDBuilder MDHelper(M.getContext()); - assert(HlslControlFlowMD->getNumOperands() == 2 && - "invalid operands for hlsl.controlflow.hint"); + llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints"); + llvm::Metadata *HintsValue = MDHelper.createConstant( + mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1))); - MDBuilder MDHelper(M.getContext()); - ConstantInt *Op1 = - mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1)); + MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue}); - SmallVector<llvm::Metadata *, 2> Vals( - ArrayRef<Metadata *>{MDHelper.createString("dx.controlflow.hints"), - MDHelper.createConstant(Op1)}); + BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); + BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); +} + +static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) { + return { + M.getMDKindID("dx.nonuniform"), M.getMDKindID("dx.controlflow.hints"), + M.getMDKindID("dx.precise"), llvm::LLVMContext::MD_range, + llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias}; +} - MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals); +static void translateInstructionMetadata(Module &M) { + // construct allowlist of valid metadata node kinds + std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M); - BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); - BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); + for (Function &F : M) { + for (BasicBlock &BB : F) { + // This needs to be done first so that "hlsl.controlflow.hints" isn't + // removed in the whitelist below + if (auto *I = BB.getTerminator()) + translateBranchMetadata(M, I); + + for (auto &I : make_early_inc_range(BB)) { + I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + } } } } -static void translateMetadata(Module &M, DXILResourceMap &DRM, - DXILResourceTypeMap &DRTM, - const ModuleShaderFlags &ShaderFlags, - const ModuleMetadataInfo &MMDI) { +static void cleanModuleFlags(Module &M) { + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + if (!MDFlags) + return; + + SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries; + M.getModuleFlagsMetadata(FlagEntries); + bool Updated = false; + for (auto &Flag : FlagEntries) { + // llvm 3.7 only supports behavior up to AppendUnique. + if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) + continue; + Flag.Behavior = Module::ModFlagBehavior::Warning; + Updated = true; + } + + if (!Updated) + return; + + MDFlags->eraseFromParent(); + + for (auto &Flag : FlagEntries) + M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); +} + +static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, + DXILResourceTypeMap &DRTM, + const ModuleShaderFlags &ShaderFlags, + const ModuleMetadataInfo &MMDI) { LLVMContext &Ctx = M.getContext(); IRBuilder<> IRB(Ctx); SmallVector<MDNode *> EntryFnMDNodes; @@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM, M.getOrInsertNamedMetadata("dx.entryPoints"); for (auto *Entry : EntryFnMDNodes) EntryPointsNamedMD->addOperand(Entry); + + cleanModuleFlags(M); + + // dx.rootsignatures will have been parsed from its metadata form as its + // binary form as part of the RootSignatureAnalysisWrapper, so safely + // remove it as it is not recognized in DXIL + if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) + RootSignature->eraseFromParent(); + + // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and + // causes all tests using the DXIL Validator to fail. + // + // This is a temporary fix and should be replaced with a allowlist once + // we have determined all metadata that the DXIL Validator allows + if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) + ErrNo->eraseFromParent(); } PreservedAnalyses DXILTranslateMetadata::run(Module &M, @@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M, const ModuleShaderFlags &ShaderFlags = MAM.getResult<ShaderFlagsAnalysis>(M); const dxil::ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return PreservedAnalyses::all(); } @@ -409,10 +467,13 @@ public: AU.addRequired<DXILResourceWrapperPass>(); AU.addRequired<ShaderFlagsAnalysisWrapper>(); AU.addRequired<DXILMetadataAnalysisWrapperPass>(); - AU.addPreserved<DXILResourceWrapperPass>(); + AU.addRequired<RootSignatureAnalysisWrapper>(); + AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); - AU.addPreserved<ShaderFlagsAnalysisWrapper>(); AU.addPreserved<DXILResourceBindingWrapperPass>(); + AU.addPreserved<DXILResourceWrapperPass>(); + AU.addPreserved<RootSignatureAnalysisWrapper>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } bool runOnModule(Module &M) override { @@ -425,8 +486,8 @@ public: dxil::ModuleMetadataInfo MMDI = getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata(); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return true; } }; @@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) +INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h index f3f5eb1..4c1ffac 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h @@ -13,7 +13,8 @@ namespace llvm { -/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes +/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent, +/// then emits all recognized DXIL Metadata class DXILTranslateMetadata : public PassInfoMixin<DXILTranslateMetadata> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index fb0928b8..ede8463 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion", ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, ExtensionHVXV73, ExtensionHVXV75]>; +def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion", + "Hexagon::ArchEnum::V81", "Hexagon HVX instructions", + [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, + ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>; + def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", @@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">, AssemblerPredicate<(all_of ExtensionHVXV75)>; def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">, AssemblerPredicate<(all_of ExtensionHVXV79)>; +def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV81)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79, ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; +def : Proc<"hexagonv81", HexagonModelV81, + [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73, + ArchV75, ArchV79, ArchV81, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 8984534..9bf4034 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -29,7 +29,8 @@ enum class ArchEnum { V71, V73, V75, - V79 + V79, + V81 }; inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { @@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { .Case("hexagonv73", Hexagon::ArchEnum::V73) .Case("hexagonv75", Hexagon::ArchEnum::V75) .Case("hexagonv79", Hexagon::ArchEnum::V79) + .Case("hexagonv81", Hexagon::ArchEnum::V81) .Default(std::nullopt); } } // namespace Hexagon diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td index 8ec1d93..f623fd0 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.td +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>; def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">; def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>; +def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">; +def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index 93696e0..f4e36fa7 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -7222,3 +7222,595 @@ class DepHVXItinV79 { [Hex_FWD, Hex_FWD, HVX_FWD]> ]; } + +class DepHVXItinV81 { + list<InstrItinData> DepHVXItinV81_list = [ + InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_37820f4c, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3904b926, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3ce09744, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_4942646a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_531b383c, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_540c3da3, /*SLOT0,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_56e64202, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_649072c2, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7177e272, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_72e2b393, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_73efe966, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7417e785, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_8772086c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_946013d8, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a19b9305, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ab23f776, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac4046bc, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b091f1c6, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c127de3a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_c4edf264, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_cda936da, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_dcca380f, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_e2fdd6e6, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e699ae41, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_f175e046, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index 7a1ad3e..48b665c 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -13740,3 +13740,891 @@ class DepScalarItinV79 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV81 { + list<InstrItinData> DepScalarItinV81_list = [ + InstrItinData <tc_011e0e9d, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01d44cb2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01e1be3b, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_02fe1c65, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0655b949, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_075c8dd8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a195f2c, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a43be35, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0a6c20ae, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0ba0d5da, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0dfac0a7, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0fac1eb8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_112d30d6, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1242dc2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1248597c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_139ef484, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_14ab4f41, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_151bf368, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_158aa3f7, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_197dce51, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1981450d, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_1c2c7a4a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1c7522a8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1d41f8b7, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fcb8495, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fe4ab69, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20131976, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2237d952, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_23708a21, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_2471c1c8, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24e109c7, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24f426ab, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_27106296, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_280f7fe1, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_28e55c6f, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c13e7f5, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c3e17fc, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_2f573607, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_33e7e673, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_362b0be2, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_38382228, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_388f9897, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_38e0bae9, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3d14a17b, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3edca78f, /*tc_2*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3fbf1042, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_407e96f9, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_40d64c94, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4222e6bf, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_42ff66ba, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_442395f3, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_449acf79, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44d5a428, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44fffc58, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_45791fb8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_45f9d1be, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_46c18ecf, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_49fdfd4b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4a55d03c, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4abdbdc6, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4ac61d92, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4bf903b0, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_503ce0f3, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_512b1653, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_53c851ab, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_54f0cee2, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_5502c366, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55255f2b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_556f6577, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55a9a350, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55b33fda, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_56a124a7, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_57a55b54, /*tc_1*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5944960d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_59a7822c, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a222e89, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a4b5e58, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5b347363, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5ceb2f9e, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5da50c4b, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5deb5e47, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5e4cf0e8, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_60e324ff, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_63567288, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4], + [Hex_FWD]>, + + InstrItinData <tc_64b00d8a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_651cbe02, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65279839, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65cbd974, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_69bfb303, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6aa823ab, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6ae3426b, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6d861a95, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6e20402a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6f42bc60, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fb52018, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fc5dbea, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_711c805f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_713b66bf, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7401744f, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7476d766, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_74a42bda, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_759e57be, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_76bb5435, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7d6a2568, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_77f94a5e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_788b1d09, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_78f87ed3, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_7af3a37e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7b9187d3, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c28bd7e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_7c31e19a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c6d32e4, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7dc63b5c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f58404a, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_7f7f45f5, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f8ae742, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8035e91f, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_822c3c68, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_829d8a86, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_838c4d7a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_84a7500d, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_86173609, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_887d1bb7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a6d0d94, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a825db2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8b5bd4f5, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8e82e8ca, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8f36a2fd, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9124c04f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_92240447, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_934753bb, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_937dd41c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [], + []>, + + InstrItinData <tc_9406230a, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95a33176, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95f43c5e, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_96ef76ef, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_975a4e54, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9783714b, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b20a062, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b34f5e0, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_9b3c0462, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9bcfb2ee, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9c52f549, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e27f2f9, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e72dc89, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edb7c77, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edefe01, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9f6cd987, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a08b630b, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a1297125, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a154b476, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a2b365d2, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a3070909, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a32e03e7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a38c45dc, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4e22bbd, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4ee89db, /*tc_2early*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_a724463d, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7a13fac, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7bdb22c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a9edeffa, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_abfd9a6d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac65613f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_addc37a8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ae5babd7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_aee6250c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_af6af259, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b1ae5f67, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_b2196a3f, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b3d46584, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_b4dc7630, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b7c4062a, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b837298f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_b9bec29e, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_ba9255a6, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb07f2c5, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb78483e, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb831a7c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bf2ffc0f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c20701f0, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c21d7447, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c57d9f39, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c818ff7f, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_ce59038e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_cfa0e29b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d03278fd, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d234b61a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d33e5eee, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d3632d88, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d45ba9cd, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d57d649c, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_d61dfdc3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d68dca5c, /*tc_3stall*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d71ea8fa, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d7718fbe, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_db596beb, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_db96aa6b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_dc51281d, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_decdde8a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_df5d53f9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e3d699e3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e60def48, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_e9170fb7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed03645c, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed3f8d2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eed07714, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eeda4109, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ef921005, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f098b237, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0cdeccf, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0e8e832, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f34c1c21, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f38f92e1, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f529831b, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f6e2aff9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f7569068, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f97707c1, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f999c66e, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fae9dfa5, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fedb7e19, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index ae96753..f8f1c2a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -39178,6 +39178,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_hf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_qf16 : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -39269,6 +39282,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_sf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_sf_sf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -41116,6 +41142,17 @@ let hasNewValue = 1; let opNewValue = 0; let isSolo = 1; } +def Y2_tlbpp : HInst< +(outs IntRegs:$Rd32), +(ins DoubleRegs:$Rss32), +"$Rd32 = tlbp($Rss32)", +tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01101100011; +let hasNewValue = 1; +let opNewValue = 0; +let isSolo = 1; +} def Y2_tlbr : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 17cb96c..23f4b3a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>; + +// V81 HVX Instructions. + +def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e285e04..7ee280d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { IntNo == Intrinsic::hexagon_V6_vgathermh || IntNo == Intrinsic::hexagon_V6_vgathermh_128B || IntNo == Intrinsic::hexagon_V6_vgathermhw || - IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) { + IntNo == Intrinsic::hexagon_V6_vgathermhw_128B || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) { SelectV65Gather(N); return; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index c7a4f68..3cc146b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { case Intrinsic::hexagon_V6_vgathermhw_128B: Opcode = Hexagon::V6_vgathermhw_pseudo; break; + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: + Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo; + break; } SDVTList VTs = CurDAG->getVTList(MVT::Other); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 9f7f434..526b4de 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::hexagon_V6_vgathermhq: case Intrinsic::hexagon_V6_vgathermhq_128B: case Intrinsic::hexagon_V6_vgathermhwq: - case Intrinsic::hexagon_V6_vgathermhwq_128B: { + case Intrinsic::hexagon_V6_vgathermhwq_128B: + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: { const Module &M = *I.getParent()->getParent()->getParent(); Info.opc = ISD::INTRINSIC_W_CHAIN; Type *VecTy = I.getArgOperand(1)->getType(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 939841a..47726d6 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const { MachineBasicBlock::iterator First; switch (Opc) { - case Hexagon::V6_vgathermh_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); + case Hexagon::V6_vgather_vscatter_mh_pseudo: + // This is mainly a place holder. It will be extended. + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + case Hexagon::V6_vgathermh_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); } return MI.getIterator(); @@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vL32b_nt_tmp_npred_ai: case Hexagon::V6_vS32Ub_npred_ai: case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgather_vscatter_mh_pseudo: case Hexagon::V6_vgathermw_pseudo: case Hexagon::V6_vgathermhw_pseudo: case Hexagon::V6_vgathermhq_pseudo: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td index f927f9b..42393d0 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td @@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>; defm V6_vgathermw_pseudo : vgathermw<HvxVR>; defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>; + +multiclass vgather_scatter_mh<RegisterClass RC> { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, + mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>; + multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index b8a9cf3..9bcd4bf 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td" include "HexagonScheduleV73.td" include "HexagonScheduleV75.td" include "HexagonScheduleV79.td" +include "HexagonScheduleV81.td"
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td new file mode 100644 index 0000000..dd5f5a0 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td @@ -0,0 +1,31 @@ +//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HexagonV81ItinList : DepScalarItinV81, ScalarItin, + DepHVXItinV81, HVXItin, PseudoItin { + list<InstrItinData> ItinList = + !listconcat(DepScalarItinV81_list, ScalarItin_list, + DepHVXItinV81_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV81 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV81ItinList.ItinList>; + +def HexagonModelV81 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV81; + let LoadLatency = 1; + let CompleteModel = 0; +} diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 7430567..995f66d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -224,6 +224,15 @@ public: bool useHVXV79Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V79; } + bool hasV81Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V81; + } + bool hasV81OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V81; + } + bool useHVXV81Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V81; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 171e294..e925e04 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -31,6 +31,10 @@ using namespace llvm; static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); +cl::opt<bool> HexagonAllowScatterGatherHVX( + "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden, + cl::desc("Allow auto-generation of HVX scatter-gather")); + static cl::opt<bool> EnableV68FloatAutoHVX( "force-hvx-float", cl::Hidden, cl::desc("Enable auto-vectorization of floatint point types on v68.")); @@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/, return HexagonMaskedVMem && ST.isTypeForHVX(DataType); } +bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const { + // For now assume we can not deal with all HVX datatypes. + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const { + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedGather(VTy, Alignment); +} + +bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedScatter(VTy, Alignment); +} + /// --- Vector TTI end --- unsigned HexagonTTIImpl::getPrefetchDistance() const { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index dbf16c9..cec2bf9 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -169,6 +169,12 @@ public: unsigned AddressSpace) const override; bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override; + bool isLegalMaskedGather(Type *Ty, Align Alignment) const override; + bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override; + bool forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const override; + bool forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const override; /// @} diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 9ab5202..5c50ec2 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -57,6 +57,11 @@ #define DEBUG_TYPE "hexagon-vc" +// This is a const that represents default HVX VTCM page size. +// It is boot time configurable, so we probably want an API to +// read it, but for now assume 128KB +#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072 + using namespace llvm; namespace { @@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { class HvxIdioms { public: + enum DstQualifier { + Undefined = 0, + Arithmetic, + LdSt, + LLVM_Gather, + LLVM_Scatter, + HEX_Gather_Scatter, + HEX_Gather, + HEX_Scatter, + Call + }; + HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) { auto *Int32Ty = HVC.getIntTy(32); HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false); @@ -473,6 +490,11 @@ private: auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX, Signedness SgnX, ArrayRef<Value *> WordY, Signedness SgnY) const -> SmallVector<Value *>; + // Vector manipulations for Ripple + bool matchScatter(Instruction &In) const; + bool matchGather(Instruction &In) const; + Value *processVScatter(Instruction &In) const; + Value *processVGather(Instruction &In) const; VectorType *HvxI32Ty; VectorType *HvxP32Ty; @@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool { } auto AlignVectors::run() -> bool { - LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName() + LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName() << '\n'); if (!createAddressGroups()) return false; @@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const return Ext; } +inline bool HvxIdioms::matchScatter(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_scatter); +} + +inline bool HvxIdioms::matchGather(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_gather); +} + +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual); + +// Binary instructions we want to handle as users of gather/scatter. +inline bool isArithmetic(unsigned Opc) { + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + return true; + } + return false; +} + +// TODO: Maybe use MemoryLocation for this. See getLocOrNone above. +inline Value *getPointer(Value *Ptr) { + assert(Ptr && "Unable to extract pointer"); + if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) + return Ptr; + if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr)) + return getLoadStorePointerOperand(Ptr); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) { + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(1); + } + return nullptr; +} + +static Instruction *selectDestination(Instruction *In, + HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + if (isa<StoreInst>(In)) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_gather) { + Destination = In; + Qual = HvxIdioms::LLVM_Gather; + } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) { + Destination = In; + Qual = HvxIdioms::LLVM_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::masked_store) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (II->getIntrinsicID() == + Intrinsic::hexagon_V6_vgather_vscattermh) { + Destination = In; + Qual = HvxIdioms::HEX_Gather_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Gather; + } + } else if (isa<ZExtInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CastInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CallInst>(In)) { + Destination = In; + Qual = HvxIdioms::Call; + } else if (isa<GetElementPtrInst>(In)) { + return locateDestination(In, Qual); + } else if (isArithmetic(In->getOpcode())) { + Destination = In; + Qual = HvxIdioms::Arithmetic; + } else { + LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n"); + } + return Destination; +} + +// This method attempts to find destination (user) for a given intrinsic. +// Given that these are produced only by Ripple, the number of options is +// limited. Simplest case is explicit store which in fact is redundant (since +// HVX gater creates its own store during packetization). Nevertheless we need +// to figure address where we storing. Other cases are more complicated, but +// still few. +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + // Get all possible destinations + SmallVector<Instruction *> Users; + // Iterate over the uses of the instruction + for (auto &U : In->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) { + Destination = selectDestination(UI, Qual); + if (Destination) + Users.push_back(Destination); + } + } + // Now see which of the users (if any) is a memory destination. + for (auto *I : Users) + if (getPointer(I)) + return I; + return Destination; +} + +// The two intrinsics we handle here have GEP in a different position. +inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) { + assert(In && "Bad instruction"); + IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In); + assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather || + IIn->getIntrinsicID() == Intrinsic::masked_scatter)) && + "Not a gather Intrinsic"); + GetElementPtrInst *GEPIndex = nullptr; + if (IIn->getIntrinsicID() == Intrinsic::masked_gather) + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0)); + else + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1)); + return GEPIndex; +} + +// Given the intrinsic find its GEP argument and extract base address it uses. +// The method relies on the way how Ripple typically forms the GEP for +// scatter/gather. +static Value *locateAddressFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *BaseAddress = GEPIndex->getPointerOperand(); + auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress); + if (IndexLoad) + return IndexLoad; + + auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress); + if (IndexZEx) { + IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0)); + if (IndexLoad) + return IndexLoad; + IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0)); + if (II && II->getIntrinsicID() == Intrinsic::masked_gather) + return locateAddressFromIntrinsic(II); + } + auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress); + if (BaseShuffle) { + IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0)); + if (IndexLoad) + return IndexLoad; + auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0)); + if (IE) { + auto *Src = IE->getOperand(1); + IndexLoad = dyn_cast<LoadInst>(Src); + if (IndexLoad) + return IndexLoad; + auto *Alloca = dyn_cast<AllocaInst>(Src); + if (Alloca) + return Alloca; + if (isa<Argument>(Src)) { + return Src; + } + if (isa<GlobalValue>(Src)) { + return Src; + } + } + } + LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n"); + return nullptr; +} + +static Type *getIndexType(Value *In) { + if (!In) + return nullptr; + + if (isa<LoadInst>(In) || isa<StoreInst>(In)) + return getLoadStoreType(In); + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getType(); + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(0)->getType(); + } + return In->getType(); +} + +static Value *locateIndexesFromGEP(Value *In) { + if (!In) + return nullptr; + if (isa<LoadInst>(In)) + return In; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return In; + if (II->getIntrinsicID() == Intrinsic::masked_gather) + return In; + } + if (auto *IndexZEx = dyn_cast<ZExtInst>(In)) + return locateIndexesFromGEP(IndexZEx->getOperand(0)); + if (auto *IndexSEx = dyn_cast<SExtInst>(In)) + return locateIndexesFromGEP(IndexSEx->getOperand(0)); + if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In)) + return locateIndexesFromGEP(BaseShuffle->getOperand(0)); + if (auto *IE = dyn_cast<InsertElementInst>(In)) + return locateIndexesFromGEP(IE->getOperand(1)); + if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In)) + return cstDataVector; + if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In)) + return GEPIndex->getOperand(0); + return nullptr; +} + +// Given the intrinsic find its GEP argument and extract offsetts from the base +// address it uses. +static Value *locateIndexesFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *Indexes = GEPIndex->getOperand(1); + if (auto *IndexLoad = locateIndexesFromGEP(Indexes)) + return IndexLoad; + + LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n"); + return nullptr; +} + +// Because of aukward definition of many Hex intrinsics we often have to +// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP +// for all use cases, so this only exist to make IR builder happy. +inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 64; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32"); +} + +// Recast <128 x i8> as <32 x i32> +inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 128; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32"); +} + +// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern +inline Value *get_i32_Mask(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, LLVMContext &Ctx, + unsigned int pattern) { + std::vector<unsigned int> byteMask; + for (unsigned i = 0; i < 32; ++i) + byteMask.push_back(pattern); + + return Builder.CreateIntrinsic( + HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt), + {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)}, + nullptr); +} + +Value *HvxIdioms::processVScatter(Instruction &In) const { + auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather"); + unsigned InpSize = HVC.getSizeOf(InpTy); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.scatter needs integer type argument"); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy); + LLVM_DEBUG({ + unsigned Elements = HVC.length(InpTy); + dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n"; + dbgs() << " Input type(" << *InpTy << ") elements(" << Elements + << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth(" + << ElemWidth << ")\n"; + }); + + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + auto *ValueToScatter = In.getOperand(0); + LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n"); + + if (HVC.HST.getVectorLength() != InpSize) { + LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize + << ") for vscatter\n"); + return nullptr; + } + + // Base address of indexes. + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(IndexLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + // Indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx), + "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n"); + // Adjust Indexes + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + Value *CastIndex = nullptr; + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = + Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false)); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(), + IndexesAlloca, "reload_index"); + } else { + if (ElemWidth == 2) + CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + else + CastIndex = Indexes; + } + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + // Extend indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Now unpack values to scatter + Value *CastSrc = + getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + auto *UnpackedValueToScatter = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter + << ")\n"); + + [[maybe_unused]] Value *UVSHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter); + [[maybe_unused]] Value *UVSLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter); + LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n"); + LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n"); + + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexHi, UVSHi}, + nullptr); + LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexLo, UVSLo}, + nullptr); + } else if (ElemWidth == 2) { + Value *CastSrc = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + CastSrc}, + nullptr); + } else if (ElemWidth == 4) { + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + ValueToScatter}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n"); + return nullptr; + } +} + +Value *HvxIdioms::processVGather(Instruction &In) const { + [[maybe_unused]] auto *InpTy = + dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.gather"); + [[maybe_unused]] auto *ElemTy = + dyn_cast<PointerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.gather needs vector of ptr argument"); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n" + << *In.getParent() << "\n"); + LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements(" + << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy) + << ") type(" << *ElemTy << ") Access alignment(" + << *In.getOperand(1) << ") AddressSpace(" + << ElemTy->getAddressSpace() << ")\n"); + + // TODO: Handle masking of elements. + assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) && + "llvm.gather needs vector for mask"); + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + // See who is using the result. The difference between LLVM and HVX vgather + // Intrinsic makes it impossible to handle all cases with temp storage. Alloca + // in VTCM is not yet supported, so for now we just bail out for those cases. + HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined; + Instruction *Dst = locateDestination(&In, Qual); + if (!Dst) { + LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual + << ")\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(Dst); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n"); + return nullptr; + } + + // Result type. Assume it is a vector type. + auto *DstType = cast<VectorType>(getIndexType(Dst)); + assert(DstType && "Cannot handle non vector dst type for llvm.gather"); + + // Base address for sources to be loaded + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Gather indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + + Instruction *Gather = nullptr; + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) { + // We fully assume the address space is in VTCM. We also assume that all + // pointers in Operand(0) have the same base(!). + // This is the most basic case of all the above. + unsigned OutputSize = HVC.getSizeOf(DstType); + auto *DstElemTy = cast<IntegerType>(DstType->getElementType()); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy); + LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType() + << " Address space (" + << Ptr->getType()->getPointerAddressSpace() << ")\n" + << " Result type : " << *DstType + << "\n Size in bytes : " << OutputSize + << " element type(" << *DstElemTy + << ")\n ElemWidth : " << ElemWidth << " bytes\n"); + + auto *IndexType = cast<VectorType>(getIndexType(Indexes)); + assert(IndexType && "Cannot handle non vector index type for llvm.gather"); + unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType()); + LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n"); + + // Intrinsic takes i32 instead of pointer so cast. + Value *CastedPtr = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...] + // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty] + // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty] + if (HVC.HST.getVectorLength() == OutputSize) { + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + // Unpack indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = + Builder.CreateBitCast(Indexes, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = + Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true), + V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes + << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + // We use our destination allocation as a temp storage + // This is unlikely to work properly for masked gather. + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq); + [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n"); + // Rematerialize the result + [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi"); + LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n"); + // Same for the low part. Here we use Gather to return non-NULL result + // from this function and continue to iterate. We also are deleting Dst + // store below. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n"); + Value *LoadedResultLo = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo"); + LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n"); + // Now we have properly sized bytes in every other position + // B b A a c a A b B c f F g G h H is presented as + // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H + // Use vpack to gather them + auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb); + [[maybe_unused]] auto Res = Builder.CreateIntrinsic( + NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr); + LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n"); + [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr); + LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n"); + } else if (ElemWidth == 2) { + // v32i16 + if (IndexWidth == 2) { + // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match. + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() + << " Shifted half index: " << *AdjustedIndex << ")\n"); + + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh); + // The 3rd argument is the size of the region to gather from. Probably + // want to set it to max VTCM size. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + for (auto &U : Dst->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " dst used by: " << *UI << "\n"; + } + for (auto &U : In.uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " In used by : " << *UI << "\n"; + } + // Create temp load from result in case the result is used by any + // other instruction. + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else { + dbgs() << " Unhandled index type for vgather\n"; + return nullptr; + } + } else if (ElemWidth == 4) { + if (IndexWidth == 4) { + // v32i32 + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)}); + LLVM_DEBUG(dbgs() + << " Shifted word index: " << *AdjustedIndex << ")\n"); + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n"); + return nullptr; + } + } else { + LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n"); + return nullptr; + } + } else if (HVC.HST.getVectorLength() == OutputSize * 2) { + // This is half of the reg width, duplicate low in high + LLVM_DEBUG(dbgs() << " Unhandled half of register size\n"); + return nullptr; + } else if (HVC.HST.getVectorLength() * 2 == OutputSize) { + LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n"); + return nullptr; + } + // Erase the original intrinsic and store that consumes it. + // HVX will create a pseudo for gather that is expanded to gather + store + // during packetization. + Dst->eraseFromParent(); + } else if (Qual == HvxIdioms::LLVM_Scatter) { + // Gather feeds directly into scatter. + LLVM_DEBUG({ + auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType()); + assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); + unsigned DstInpSize = HVC.getSizeOf(DstInpTy); + unsigned DstElements = HVC.length(DstInpTy); + auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType()); + assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); + dbgs() << " Gather feeds into scatter\n Values to scatter : " + << *Dst->getOperand(0) << "\n"; + dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements + << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy + << ") Access alignment(" << *Dst->getOperand(2) << ")\n"; + }); + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + if (!isa<PointerType>(Src->getType())) { + LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n"); + return nullptr; + } + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) { + LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + + Value *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // Shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n"); + + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else if (Qual == HvxIdioms::HEX_Gather_Scatter) { + // Gather feeds into previously inserted pseudo intrinsic. + // These could not be in the same packet, so we need to generate another + // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo + // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt, + // ModRegs:$Mu, HvxVR:$Vv) + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need THEM in a reg. + // This most likely will not work properly since alloca gives us DDR + // stack location. This will be fixed once we teach compiler about VTCM. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } else { + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + auto *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + Indexes}, + nullptr); + } + return Gather; + } else if (Qual == HvxIdioms::HEX_Scatter) { + // This is the case when result of a gather is used as an argument to + // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it + // ourselves. We have to create alloca, store to it, and replace all uses + // with that. + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + CastIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else if (Qual == HvxIdioms::HEX_Gather) { + // Gather feeds to another gather but already replaced with + // hexagon_V6_vgathermh_128B + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca + << "\n AddressSpace: " + << ResultAlloca->getAddressSpace() << "\n";); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } + } else if (Qual == HvxIdioms::LLVM_Gather) { + // Gather feeds into another gather + errs() << " Underimplemented vgather to vgather sequence\n"; + return nullptr; + } else + llvm_unreachable("Unhandled Qual enum"); + + return Gather; +} + auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, const FxpOp &Op) const -> Value * { assert(Op.X.Val->getType() == Op.Y.Val->getType()); @@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool { It = StartOver ? B.rbegin() : cast<Instruction>(New)->getReverseIterator(); Changed = true; + } else if (matchGather(*It)) { + Value *New = processVGather(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; + } else if (matchScatter(*It)) { + Value *New = processVScatter(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; } } } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 6455757..2f59b7c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) { case Hexagon::ArchV79: case Hexagon::ExtensionHVXV79: return 79; + case Hexagon::ArchV81: + case Hexagon::ExtensionHVXV81: + return 81; } llvm_unreachable("Expected valid arch feature"); return 0; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 6b48a21..b8075bd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"), cl::init(false)); cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"), cl::init(false)); +cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"), + cl::init(false)); } // namespace static cl::opt<Hexagon::ArchEnum> EnableHVX( @@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX( clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"), clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"), clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"), + clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"), // Sentinel for no value specified. clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. @@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() { return "hexagonv75"; if (MV79) return "hexagonv79"; + if (MV81) + return "hexagonv81"; return ""; } @@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { case Hexagon::ArchEnum::V79: Result.push_back("+hvxv79"); break; + case Hexagon::ArchEnum::V81: + Result.push_back("+hvxv81"); + break; case Hexagon::ArchEnum::Generic: { Result.push_back(StringSwitch<StringRef>(CPU) @@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv71t", "+hvxv71") .Case("hexagonv73", "+hvxv73") .Case("hexagonv75", "+hvxv75") - .Case("hexagonv79", "+hvxv79")); + .Case("hexagonv79", "+hvxv79") + .Case("hexagonv81", "+hvxv81")); break; } case Hexagon::ArchEnum::NoArch: @@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { FeatureBitset FB = S; unsigned CpuArch = ArchV5; for (unsigned F : - {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66, - ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { + {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, + ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, - ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) { + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV81: + FB.set(ExtensionHVXV81); + [[fallthrough]]; case ArchV79: FB.set(ExtensionHVXV79); [[fallthrough]]; @@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) { std::optional<unsigned> Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { - for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75, - Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71, - Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68, - Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66, - Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62, - Hexagon::ExtensionHVXV60}) + for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79, + Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73, + Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69, + Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67, + Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65, + Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60}) if (Features.test(Arch)) return Arch; return {}; @@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) { for (auto Arch : - {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71, - Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66, - Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55, - Hexagon::ArchV5}) + {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, + Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, + Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, + Hexagon::ArchV55, Hexagon::ArchV5}) if (Features.test(Arch)) return Arch; - llvm_unreachable("Expected arch v5-v79"); + llvm_unreachable("Expected arch v5-v81"); return 0; } @@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { .Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T) .Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73) .Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75) - .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79); + .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79) + .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81); } llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index f7deeaf..ca4a655 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2614,6 +2614,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + // Try to widen vectors to gain more optimization opportunities. + if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) + return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 22cf3a7..598735f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -4675,7 +4675,7 @@ class WMMA_INSTR<string _Intr, list<dag> _Args> // class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride> - : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, + : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record_name, [!con((ins ADDR:$src), !if(WithStride, (ins B32:$ldm), (ins)))]>, Requires<Frag.Predicates> { @@ -4714,7 +4714,7 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride> // class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride> - : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, + : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record_name, [!con((ins ADDR:$dst), Frag.Ins, !if(WithStride, (ins B32:$ldm), (ins)))]>, @@ -4778,7 +4778,7 @@ class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, WMMA_REGINFO FragC, WMMA_REGINFO FragD, string ALayout, string BLayout, int Satfinite, string rnd, string b1op> - : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, + : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record_name, [FragA.Ins, FragB.Ins, FragC.Ins]>, // Requires does not seem to have effect on Instruction w/o Patterns. // We set it here anyways and propagate to the Pat<> we construct below. @@ -4837,7 +4837,7 @@ defset list<WMMA_INSTR> WMMAs = { class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, WMMA_REGINFO FragC, WMMA_REGINFO FragD, string ALayout, string BLayout, int Satfinite, string b1op, string Kind> - : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, Kind, FragA, FragB, FragC, FragD>.record, + : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, Kind, FragA, FragB, FragC, FragD>.record_name, [FragA.Ins, FragB.Ins, FragC.Ins]>, // Requires does not seem to have effect on Instruction w/o Patterns. // We set it here anyways and propagate to the Pat<> we construct below. @@ -4891,7 +4891,7 @@ class MMA_SP<WMMA_REGINFO FragA, WMMA_REGINFO FragB, WMMA_REGINFO FragC, WMMA_REGINFO FragD, string Metadata, string Kind, int Satfinite> : WMMA_INSTR<MMA_SP_NAME<Metadata, Kind, Satfinite, - FragA, FragB, FragC, FragD>.record, + FragA, FragB, FragC, FragD>.record_name, [FragA.Ins, FragB.Ins, FragC.Ins, (ins B32:$metadata, i32imm:$selector)]>, // Requires does not seem to have effect on Instruction w/o Patterns. @@ -4946,7 +4946,7 @@ defset list<WMMA_INSTR> MMA_SPs = { // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space> - : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins ADDR:$src)]>, + : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record_name, [(ins ADDR:$src)]>, Requires<Frag.Predicates> { // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), @@ -4981,7 +4981,7 @@ defset list<WMMA_INSTR> LDMATRIXs = { // stmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // class STMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space> - : WMMA_INSTR<STMATRIX_NAME<Frag, Transposed>.record, [!con((ins ADDR:$dst), Frag.Ins)]>, + : WMMA_INSTR<STMATRIX_NAME<Frag, Transposed>.record_name, [!con((ins ADDR:$dst), Frag.Ins)]>, Requires<Frag.Predicates> { // Build PatFrag that only matches particular address space. dag PFOperands = !con((ops node:$dst), @@ -5376,7 +5376,7 @@ class Tcgen05MMAInst<bit Sp, string KindStr, string ASpace, Requires<PTXPredicates> { Intrinsic Intrin = !cast<Intrinsic>( - NVVM_TCGEN05_MMA<Sp, ASpace, AShift, ScaleInputD>.record + NVVM_TCGEN05_MMA<Sp, ASpace, AShift, ScaleInputD>.record_name ); dag ScaleInpIns = !if(!eq(ScaleInputD, 1), (ins i64imm:$scale_input_d), (ins)); @@ -5618,7 +5618,7 @@ class Tcgen05MMABlockScaleInst<bit Sp, string ASpace, string KindStr, Requires<[hasTcgen05Instructions, PTXPredicate]> { Intrinsic Intrin = !cast<Intrinsic>( - NVVM_TCGEN05_MMA_BLOCKSCALE<Sp, ASpace, KindStr, ScaleVecSize>.record); + NVVM_TCGEN05_MMA_BLOCKSCALE<Sp, ASpace, KindStr, ScaleVecSize>.record_name); dag SparseMetadataIns = !if(!eq(Sp, 1), (ins B32:$spmetadata), (ins)); dag SparseMetadataIntr = !if(!eq(Sp, 1), (Intrin i32:$spmetadata), (Intrin)); @@ -5702,7 +5702,7 @@ class Tcgen05MMAWSInst<bit Sp, string ASpace, string KindStr, Requires<[hasTcgen05Instructions]> { Intrinsic Intrin = !cast<Intrinsic>( - NVVM_TCGEN05_MMA_WS<Sp, ASpace, HasZeroColMask>.record); + NVVM_TCGEN05_MMA_WS<Sp, ASpace, HasZeroColMask>.record_name); dag ZeroColMaskIns = !if(!eq(HasZeroColMask, 1), (ins B64:$zero_col_mask), (ins)); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index aca7abd..44d1a44 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4578,6 +4578,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>; def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>; def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>; +def : InstAlias<"mtpidr $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsISA3_0]>; +def : InstAlias<"mfpidr $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsISA3_0]>; foreach SPRG = 4-7 in { def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp index 67b510d..f2b216b 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/FormatVariadic.h" #define GET_GICOMBINER_DEPS #include "RISCVGenPostLegalizeGICombiner.inc" @@ -42,6 +43,56 @@ namespace { #include "RISCVGenPostLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES +/// Match: G_STORE (G_FCONSTANT +0.0), addr +/// Return the source vreg in MatchInfo if matched. +bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + const RISCVSubtarget &STI, Register &MatchInfo) { + if (MI.getOpcode() != TargetOpcode::G_STORE) + return false; + + Register SrcReg = MI.getOperand(0).getReg(); + if (!SrcReg.isVirtual()) + return false; + + MachineInstr *Def = MRI.getVRegDef(SrcReg); + if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + + auto *CFP = Def->getOperand(1).getFPImm(); + if (!CFP || !CFP->getValueAPF().isPosZero()) + return false; + + unsigned ValBits = MRI.getType(SrcReg).getSizeInBits(); + if ((ValBits == 16 && !STI.hasStdExtZfh()) || + (ValBits == 32 && !STI.hasStdExtF()) || + (ValBits == 64 && (!STI.hasStdExtD() || !STI.is64Bit()))) + return false; + + MatchInfo = SrcReg; + return true; +} + +/// Apply: rewrite to G_STORE (G_CONSTANT 0 [XLEN]), addr +void applyFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, const RISCVSubtarget &STI, + Register &MatchInfo) { + const unsigned XLen = STI.getXLen(); + + auto Zero = B.buildConstant(LLT::scalar(XLen), 0); + MI.getOperand(0).setReg(Zero.getReg(0)); + + MachineInstr *Def = MRI.getVRegDef(MatchInfo); + if (Def && MRI.use_nodbg_empty(MatchInfo)) + Def->eraseFromParent(); + +#ifndef NDEBUG + unsigned ValBits = MRI.getType(MatchInfo).getSizeInBits(); + LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero " + "(XLEN={1}, ValBits={2}):\n {3}\n", + DEBUG_TYPE, XLen, ValBits, MI)); +#endif +} + class RISCVPostLegalizerCombinerImpl : public Combiner { protected: const CombinerHelper Helper; diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 995dd0c..a06b60d 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -19,11 +19,20 @@ def RISCVO0PreLegalizerCombiner: GICombiner< "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> { } +// Rule: fold store (fp +0.0) -> store (int zero [XLEN]) +def fp_zero_store_matchdata : GIDefMatchData<"Register">; +def fold_fp_zero_store : GICombineRule< + (defs root:$root, fp_zero_store_matchdata:$matchinfo), + (match (G_STORE $src, $addr):$root, + [{ return matchFoldFPZeroStore(*${root}, MRI, STI, ${matchinfo}); }]), + (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>; + // Post-legalization combines which are primarily optimizations. // TODO: Add more combines. def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", [sub_to_add, combines_for_extload, redundant_and, identity_combines, shift_immed_chain, - commute_constant_to_rhs, simplify_neg_minmax]> { + commute_constant_to_rhs, simplify_neg_minmax, + fold_fp_zero_store]> { } diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 9e6b7f0..2754d78 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1124,7 +1124,8 @@ def HasStdExtZbkbOrP "'Base P' (Packed-SIMD)">; def HasStdExtZbbOrZbkbOrP - : Predicate<"Subtarget->HasStdExtZbbOrZbkb()|| Subtarget->hasStdExtP()">, + : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb() || " + "Subtarget->hasStdExtP()">, AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb, FeatureStdExtP), "'Zbb' (Basic Bit-Manipulation) or " "'Zbkb' (Bitmanip instructions for Cryptography) or " diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 26fe9ed..1c930ac 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -318,8 +318,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() && - !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() && + if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() && + !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() && + !Subtarget.hasVendorXAndesPerf() && !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); @@ -392,7 +393,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); } - if (Subtarget.hasStdExtZbb() || + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() || (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, Legal); @@ -403,6 +404,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } else { setOperationAction(ISD::CTTZ, XLenVT, Expand); + // If have a CLZW, but not CTZW, custom promote i32. + if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) + setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); } if (!Subtarget.hasCPOPLike()) { @@ -419,13 +423,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // We need the custom lowering to make sure that the resulting sequence // for the 32bit case is efficient on 64bit targets. // Use default promotion for i32 without Zbb. - if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb()) + if (Subtarget.is64Bit() && + (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP())) setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); } else { setOperationAction(ISD::CTLZ, XLenVT, Expand); } - if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) { + if (Subtarget.hasStdExtP() || + (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction(ISD::ABS, XLenVT, Legal); } else if (Subtarget.hasShortForwardBranchOpt()) { // We can use PseudoCCSUB to implement ABS. @@ -14669,6 +14675,25 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); bool IsCTZ = N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; + + // Without Zbb, lower as 32 - clzw(~X & (X-1)) + if (IsCTZ && !Subtarget.hasStdExtZbb()) { + assert(Subtarget.hasStdExtP()); + + NewOp0 = DAG.getFreeze(NewOp0); + SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64); + SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0, + DAG.getConstant(1, DL, MVT::i64)); + SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1); + SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And); + SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64, + DAG.getConstant(32, DL, MVT::i64), CLZW); + SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub, + DAG.getValueType(MVT::i32)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } + unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); @@ -14797,7 +14822,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); - SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); + SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); return; } @@ -21813,7 +21838,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( // Output is either all zero or operand 0. We can propagate sign bit count // from operand 0. return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - case RISCVISD::ABSW: { + case RISCVISD::NEGW_MAX: { // We expand this at isel to negw+max. The result will have 33 sign bits // if the input has at least 33 sign bits. unsigned Tmp = diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 7d8a919..cc085bb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1455,3 +1455,11 @@ let Predicates = [HasStdExtP, IsRV32] in { def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">; def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">; } // Predicates = [HasStdExtP, IsRV32] + + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtP] in +def : PatGpr<abs, ABS>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 4104abd..f7b4914 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -218,11 +218,13 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, } let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in { - def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">; + def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">, + SchedUnaryMC<"WriteSF_VFExp", "ReadSF_VFExp">; } let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in { - def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">; + def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">, + SchedUnaryMC<"WriteSF_VFExpa", "ReadSF_VFExpa">; } let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector", @@ -482,11 +484,53 @@ let Predicates = [HasVendorXSfvfwmaccqqq] in { defm SF_VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC; } -let Predicates = [HasVendorXSfvfnrclipxfqf] in { +let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in { defm SF_VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP; defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP; } +class VFExpSchedSEWSet<string mx, bit IsBF16, bit IsApprox> { + defvar BaseSet = SchedSEWSet<mx, isF=1>.val; + list<int> val = !if(IsBF16, !listremove(BaseSet, [32, 64]), + !if(IsApprox, BaseSet, !listremove(BaseSet, [64]))); +} +multiclass VPseudoVFExp_V<bit IsBF16 = false, bit IsApprox = false> { + defvar SchedSuffix = !if(IsApprox, "VFExpa", "VFExp"); + + foreach m = MxListF in { + defvar mx = m.MX; + foreach e = VFExpSchedSEWSet<mx, IsBF16, IsApprox>.val in { + let VLMul = m.value in { + def "_V_" # mx # "_E" # e + : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix, + mx, e, forcePassthruRead=true>; + def "_V_" # mx # "_E" # e # "_MASK" + : VPseudoUnaryMask<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix, + mx, e, forcePassthruRead=true>; + } + } + } +} + +let Predicates = [HasVendorXSfvfbfexp16e], hasSideEffects = 0 in { + let AltFmtType = IS_ALTFMT in { + defm PseudoSF_VFEXP_ALT : VPseudoVFExp_V<IsBF16=true>; + } +} + +let Predicates = [HasVendorXSfvfexpAnyFloat], hasSideEffects = 0 in { + let AltFmtType = IS_NOT_ALTFMT in { + defm PseudoSF_VFEXP : VPseudoVFExp_V; + } +} + +let Predicates = [HasVendorXSfvfexpa], AltFmtType = IS_NOT_ALTFMT in { + defm PseudoSF_VFEXPA : VPseudoVFExp_V<IsApprox=true>; +} + // SDNode def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVT<1, XLenVT>, @@ -893,3 +937,36 @@ let Predicates = [HasVendorXSfcease] in { let rs2 = 0b00101; } } + +let Predicates = [HasVendorXSfvfbfexp16e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP_ALT", + AllBF16Vectors, + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexp16e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP", + [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8], + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexp32e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP", + [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa, HasVInstructionsF16] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8], + isSEWAware=1>; +} + +let Predicates = [HasVendorXSfvfexpa64e] in { + defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA", + [VF64M1, VF64M2, VF64M4, VF64M8], isSEWAware=1>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 62b7bcd..5429c2a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -51,7 +51,7 @@ def riscv_zip : RVSDNode<"ZIP", SDTIntUnaryOp>; def riscv_unzip : RVSDNode<"UNZIP", SDTIntUnaryOp>; // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. -def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>; +def riscv_negw_max : RVSDNode<"NEGW_MAX", SDTIntUnaryOp>; // Scalar cryptography def riscv_clmul : RVSDNode<"CLMUL", SDTIntBinOp>; @@ -599,37 +599,43 @@ def : PatGpr<riscv_zip, ZIP_RV32, i32>; def : PatGpr<riscv_unzip, UNZIP_RV32, i32>; } // Predicates = [HasStdExtZbkb, IsRV32] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : PatGpr<ctlz, CLZ>; +} + +let Predicates = [HasStdExtZbb] in { def : PatGpr<cttz, CTZ>; def : PatGpr<ctpop, CPOP>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbb, IsRV64] in { +let Predicates = [HasStdExtZbbOrP, IsRV64] in { def : PatGpr<riscv_clzw, CLZW>; +} + +let Predicates = [HasStdExtZbb, IsRV64] in { def : PatGpr<riscv_ctzw, CTZW>; def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>; -def : Pat<(i64 (riscv_absw GPR:$rs1)), +def : Pat<(i64 (riscv_negw_max GPR:$rs1)), (MAX GPR:$rs1, (XLenVT (SUBW (XLenVT X0), GPR:$rs1)))>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>; def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrP] in { def : PatGprGpr<smin, MIN>; def : PatGprGpr<smax, MAX>; def : PatGprGpr<umin, MINU>; def : PatGprGpr<umax, MAXU>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in +let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in def : PatGpr<bswap, REV8_RV32, i32>; -let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in +let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in def : PatGpr<bswap, REV8_RV64, i64>; let Predicates = [HasStdExtZbkb] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index f7d1a09..b9c5b75 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -668,4 +668,38 @@ foreach vti = NoGroupBF16Vectors in { def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)), (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>; } + +let Predicates = [HasStdExtZvfbfa] in { + foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + def : Pat<(fwti.Vector (any_riscv_fpextend_vl + (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask VMV0:$vm), + VLOpFrag)), + (!cast<Instruction>("PseudoVFWCVT_F_F_ALT_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask VMV0:$vm), + GPR:$vl, fvti.Log2SEW, TA_MA)>; + + def : Pat<(fvti.Vector (any_riscv_fpround_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask VMV0:$vm), VLOpFrag)), + (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask VMV0:$vm), + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + GPR:$vl, fvti.Log2SEW, TA_MA)>; + def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), + (!cast<Instruction>("PseudoVFNCVT_F_F_ALT_W_"#fvti.LMul.MX#"_E"#fvti.SEW) + (fvti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + fvti.AVL, fvti.Log2SEW, TA_MA)>; + } +} } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 637d61fe..36a2f46 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1588,6 +1588,10 @@ multiclass SiFive7SchedResources<int vlen, bit dualVALU, //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedQ; + // TODO: scheduling info of XSfvfexp* and XSfvfexpa* + // for SiFive7 will be added in follow-up patches. + defm : UnsupportedSchedXSfvfexp; + defm : UnsupportedSchedXSfvfexpa; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 9ab9636..64ccfd8 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -523,6 +523,8 @@ include "RISCVScheduleZvk.td" // Vendor Extensions multiclass UnsupportedSchedXsf { defm : UnsupportedSchedXsfvcp; + defm : UnsupportedSchedXSfvfexp; + defm : UnsupportedSchedXSfvfexpa; defm : UnsupportedSchedXSfvfnrclipxfqf; defm : UnsupportedSchedXSfvfwmaccqqq; defm : UnsupportedSchedXSfvqmaccdod; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td index 99632e4..1ee6dc1 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td @@ -99,3 +99,23 @@ defm : LMULWriteRes<"WriteSF_VFWMACC_QQQ", []>; defm : LMULReadAdvance<"ReadSF_VFWMACC_QQQ", 0>; } // Unsupported = true } + +defm "" : LMULSEWSchedWritesF<"WriteSF_VFExp">; +defm "" : LMULSEWSchedReadsF<"ReadSF_VFExp">; + +multiclass UnsupportedSchedXSfvfexp { +let Unsupported = true in { +defm : LMULSEWWriteResF<"WriteSF_VFExp", []>; +defm : LMULSEWReadAdvanceF<"ReadSF_VFExp", 0>; +} // Unsupported = true +} + +defm "" : LMULSEWSchedWritesF<"WriteSF_VFExpa">; +defm "" : LMULSEWSchedReadsF<"ReadSF_VFExpa">; + +multiclass UnsupportedSchedXSfvfexpa { +let Unsupported = true in { +defm : LMULSEWWriteResF<"WriteSF_VFExpa", []>; +defm : LMULSEWReadAdvanceF<"ReadSF_VFExpa", 0>; +} // Unsupported = true +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 334db4b..4b4fc8f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -187,7 +187,7 @@ public: } bool hasCLZLike() const { - return HasStdExtZbb || HasVendorXTHeadBb || + return HasStdExtZbb || HasStdExtP || HasVendorXTHeadBb || (HasVendorXCVbitmanip && !IsRV64); } bool hasCTZLike() const { @@ -197,7 +197,7 @@ public: return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64); } bool hasREV8Like() const { - return HasStdExtZbb || HasStdExtZbkb || HasVendorXTHeadBb; + return HasStdExtZbb || HasStdExtZbkb || HasStdExtP || HasVendorXTHeadBb; } bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index d91923b..56a38bb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1499,18 +1499,25 @@ static bool generateKernelClockInst(const SPIRV::IncomingCall *Call, Register ResultReg = Call->ReturnRegister; - // Deduce the `Scope` operand from the builtin function name. - SPIRV::Scope::Scope ScopeArg = - StringSwitch<SPIRV::Scope::Scope>(Builtin->Name) - .EndsWith("device", SPIRV::Scope::Scope::Device) - .EndsWith("work_group", SPIRV::Scope::Scope::Workgroup) - .EndsWith("sub_group", SPIRV::Scope::Scope::Subgroup); - Register ScopeReg = buildConstantIntReg32(ScopeArg, MIRBuilder, GR); - - MIRBuilder.buildInstr(SPIRV::OpReadClockKHR) - .addDef(ResultReg) - .addUse(GR->getSPIRVTypeID(Call->ReturnType)) - .addUse(ScopeReg); + if (Builtin->Name == "__spirv_ReadClockKHR") { + MIRBuilder.buildInstr(SPIRV::OpReadClockKHR) + .addDef(ResultReg) + .addUse(GR->getSPIRVTypeID(Call->ReturnType)) + .addUse(Call->Arguments[0]); + } else { + // Deduce the `Scope` operand from the builtin function name. + SPIRV::Scope::Scope ScopeArg = + StringSwitch<SPIRV::Scope::Scope>(Builtin->Name) + .EndsWith("device", SPIRV::Scope::Scope::Device) + .EndsWith("work_group", SPIRV::Scope::Scope::Workgroup) + .EndsWith("sub_group", SPIRV::Scope::Scope::Subgroup); + Register ScopeReg = buildConstantIntReg32(ScopeArg, MIRBuilder, GR); + + MIRBuilder.buildInstr(SPIRV::OpReadClockKHR) + .addDef(ResultReg) + .addUse(GR->getSPIRVTypeID(Call->ReturnType)) + .addUse(ScopeReg); + } return true; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 3b8764a..c259cce 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -1174,6 +1174,7 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0 defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; +defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>; //===----------------------------------------------------------------------===// // Class defining an atomic instruction on floating-point numbers. diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp index f7fb886..3ca0b40 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ReplaceConstant.h" #define DEBUG_TYPE "spirv-cbuffer-access" using namespace llvm; @@ -57,6 +58,12 @@ static bool replaceCBufferAccesses(Module &M) { if (!CBufMD) return false; + SmallVector<Constant *> CBufferGlobals; + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) + CBufferGlobals.push_back(Member.GV); + convertUsersOfConstantsToInstructions(CBufferGlobals); + for (const hlsl::CBufferMapping &Mapping : *CBufMD) { Instruction *HandleDef = findHandleDef(Mapping.Handle); if (!HandleDef) { @@ -80,12 +87,7 @@ static bool replaceCBufferAccesses(Module &M) { Value *GetPointerCall = Builder.CreateIntrinsic( PtrType, Intrinsic::spv_resource_getpointer, {HandleDef, IndexVal}); - // We cannot use replaceAllUsesWith here because some uses may be - // ConstantExprs, which cannot be replaced with non-constants. - SmallVector<User *, 4> Users(MemberGV->users()); - for (User *U : Users) { - U->replaceUsesOfWith(MemberGV, GetPointerCall); - } + MemberGV->replaceAllUsesWith(GetPointerCall); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index f0ac26b..14097d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1336,22 +1336,25 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [ ]>; defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>; +multiclass PMinMaxInt<Vec vec, NI baseMinInst, NI baseMaxInst> { + def : Pat<(vec.int_vt (vselect + (setolt (vec.vt (bitconvert V128:$rhs)), + (vec.vt (bitconvert V128:$lhs))), + V128:$rhs, V128:$lhs)), + (baseMinInst $lhs, $rhs)>; + def : Pat<(vec.int_vt (vselect + (setolt (vec.vt (bitconvert V128:$lhs)), + (vec.vt (bitconvert V128:$rhs))), + V128:$rhs, V128:$lhs)), + (baseMaxInst $lhs, $rhs)>; +} // Also match the pmin/pmax cases where the operands are int vectors (but the // comparison is still a floating point comparison). This can happen when using // the wasm_simd128.h intrinsics because v128_t is an integer vector. foreach vec = [F32x4, F64x2, F16x8] in { -defvar pmin = !cast<NI>("PMIN_"#vec); -defvar pmax = !cast<NI>("PMAX_"#vec); -def : Pat<(vec.int_vt (vselect - (setolt (vec.vt (bitconvert V128:$rhs)), - (vec.vt (bitconvert V128:$lhs))), - V128:$rhs, V128:$lhs)), - (pmin $lhs, $rhs)>; -def : Pat<(vec.int_vt (vselect - (setolt (vec.vt (bitconvert V128:$lhs)), - (vec.vt (bitconvert V128:$rhs))), - V128:$rhs, V128:$lhs)), - (pmax $lhs, $rhs)>; + defvar pmin = !cast<NI>("PMIN_"#vec); + defvar pmax = !cast<NI>("PMAX_"#vec); + defm : PMinMaxInt<vec, pmin, pmax>; } // And match the pmin/pmax LLVM intrinsics as well @@ -1756,6 +1759,15 @@ let Predicates = [HasRelaxedSIMD] in { (relaxed_max V128:$lhs, V128:$rhs)>; def : Pat<(vec.vt (fmaximumnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))), (relaxed_max V128:$lhs, V128:$rhs)>; + + // Transform pmin/max-supposed patterns to relaxed min max + let AddedComplexity = 1 in { + def : Pat<(vec.vt (pmin (vec.vt V128:$lhs), (vec.vt V128:$rhs))), + (relaxed_min $lhs, $rhs)>; + def : Pat<(vec.vt (pmax (vec.vt V128:$lhs), (vec.vt V128:$rhs))), + (relaxed_max $lhs, $rhs)>; + defm : PMinMaxInt<vec, relaxed_min, relaxed_max>; + } } } diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 6261fad..706ab2b 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -160,6 +160,14 @@ FunctionPass *createX86PartialReductionPass(); /// // Analyzes and emits pseudos to support Win x64 Unwind V2. FunctionPass *createX86WinEHUnwindV2Pass(); +/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics +/// or split the data to two <128 x i32>. +FunctionPass *createX86LowerAMXTypePass(); + +/// The pass transforms amx intrinsics to scalar operation if the function has +/// optnone attribute or it is O0. +FunctionPass *createX86LowerAMXIntrinsicsPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &, const X86RegisterBankInfo &); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec..4393f6e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { if (!(Subtarget->hasVLX() || NVT.is512BitVector())) return false; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - auto getFoldableLogicOp = [](SDValue Op) { // Peek through single use bitcast. if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) @@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { return SDValue(); }; - SDValue A, FoldableOp; - if ((FoldableOp = getFoldableLogicOp(N1))) { - A = N0; - } else if ((FoldableOp = getFoldableLogicOp(N0))) { - A = N1; - } else - return false; + SDValue N0, N1, A, FoldableOp; + + // Identify and (optionally) peel an outer NOT that wraps a pure logic tree + auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { + if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && + ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { + SDValue InnerOp = Op->getOperand(0); + + if (!getFoldableLogicOp(InnerOp)) + return SDValue(); + + N0 = InnerOp.getOperand(0); + N1 = InnerOp.getOperand(1); + if ((FoldableOp = getFoldableLogicOp(N1))) { + A = N0; + return InnerOp; + } + if ((FoldableOp = getFoldableLogicOp(N0))) { + A = N1; + return InnerOp; + } + } + return SDValue(); + }; + + bool PeeledOuterNot = false; + SDNode *OriN = N; + if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) { + PeeledOuterNot = true; + N = InnerOp.getNode(); + } else { + N0 = N->getOperand(0); + N1 = N->getOperand(1); + + if ((FoldableOp = getFoldableLogicOp(N1))) + A = N0; + else if ((FoldableOp = getFoldableLogicOp(N0))) + A = N1; + else + return false; + } SDValue B = FoldableOp.getOperand(0); SDValue C = FoldableOp.getOperand(1); @@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { case ISD::XOR: Imm ^= TernlogMagicA; break; } - return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); + if (PeeledOuterNot) + Imm = ~Imm; + + return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm); } /// If the high bits of an 'and' operand are known zero, try setting the diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d49f25a..410f20e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2632,6 +2632,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, @@ -57613,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, } // Fold any similar generic ADD/SUB opcodes to reuse this node. - auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { + auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) { SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) { SDValue Op(N, 0); if (Negate) { // Bail if this is only used by a user of the x86 add/sub. @@ -57628,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, DCI.CombineTo(GenericAddSub, Op); } }; - MatchGeneric(LHS, RHS, false); - MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); + MatchGeneric(GenericOpc, LHS, RHS, false); + MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode()); + + if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + if (X86ISD::SUB == N->getOpcode()) { + // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C). + MatchGeneric(ISD::ADD, LHS, NegC, false); + } else { + // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS). + MatchGeneric(ISD::SUB, NegC, LHS, true); + } + } else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) { + if (X86ISD::SUB == N->getOpcode()) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C). + MatchGeneric(ISD::ADD, RHS, NegC, true); + } + } // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the // EFLAGS result doesn't change. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e28b9c1..b7151f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1592,7 +1592,6 @@ namespace llvm { bool useLoadStackGuardNode(const Module &M) const override; bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; - Function *getSSPStackGuardCheck(const Module &M) const override; SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const override; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 37d7772..a61bbe5 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -640,15 +640,6 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const { TargetLowering::insertSSPDeclarations(M); } -Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { - // MSVC CRT has a function to validate security cookie. - if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || - Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { - return M.getFunction("__security_check_cookie"); - } - return TargetLowering::getSSPStackGuardCheck(M); -} - Value * X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { // Android provides a fixed TLS slot for the SafeStack pointer. See the diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index edcf247..632c6a2 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -1407,7 +1407,7 @@ let isBarrier = 1, isTerminator = 1 in { let r = 0x04; } - def BREAK_N : RRRN_Inst<0x0C, (outs), (ins uimm4:$imm), + def BREAK_N : RRRN_Inst<0x0D, (outs), (ins uimm4:$imm), "break.n\t$imm", []>, Requires<[HasDensity, HasDebug]> { bits<4> imm; diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 0fce5b9..709e5f0 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -88,6 +88,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) { case ArchKind::ARMV9_4A: case ArchKind::ARMV9_5A: case ArchKind::ARMV9_6A: + case ArchKind::ARMV9_7A: return 9; case ArchKind::INVALID: return 0; @@ -127,6 +128,7 @@ static ARM::ProfileKind getProfileKind(ARM::ArchKind AK) { case ARM::ArchKind::ARMV9_4A: case ARM::ArchKind::ARMV9_5A: case ARM::ArchKind::ARMV9_6A: + case ARM::ArchKind::ARMV9_7A: return ARM::ProfileKind::A; case ARM::ArchKind::ARMV4: case ARM::ArchKind::ARMV4T: diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp index f6cea85..15ba1eb 100644 --- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp +++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp @@ -46,6 +46,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) { .Case("v9.4a", "v9.4-a") .Case("v9.5a", "v9.5-a") .Case("v9.6a", "v9.6-a") + .Case("v9.7a", "v9.7-a") .Case("v8m.base", "v8-m.base") .Case("v8m.main", "v8-m.main") .Case("v8.1m.main", "v8.1-m.main") diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 62a3c88..975a271 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -433,6 +433,8 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["fp8e5m3-insts"] = true; Features["permlane16-swap"] = true; Features["ashr-pk-insts"] = true; + Features["add-min-max-insts"] = true; + Features["pk-add-min-max-insts"] = true; Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["vmem-pref-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 1068ce4..11ba9ee 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -937,6 +937,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { return Triple::ARMSubArch_v9_5a; case ARM::ArchKind::ARMV9_6A: return Triple::ARMSubArch_v9_6a; + case ARM::ArchKind::ARMV9_7A: + return Triple::ARMSubArch_v9_7a; case ARM::ArchKind::ARMV8R: return Triple::ARMSubArch_v8r; case ARM::ArchKind::ARMV8MBaseline: diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index bbbac45..7a95df4 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -907,10 +907,20 @@ static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts, StoreInst *Store = Builder.CreateAlignedStore( Val, First.Store->getPointerOperand(), First.Store->getAlign()); + // Merge various metadata onto the new store. AAMDNodes AATags = First.Store->getAAMetadata(); - for (const PartStore &Part : drop_begin(Parts)) + SmallVector<Instruction *> Stores = {First.Store}; + Stores.reserve(Parts.size()); + SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()}; + DbgLocs.reserve(Parts.size()); + for (const PartStore &Part : drop_begin(Parts)) { AATags = AATags.concat(Part.Store->getAAMetadata()); + Stores.push_back(Part.Store); + DbgLocs.push_back(Part.Store->getDebugLoc()); + } Store->setAAMetadata(AATags); + Store->mergeDIAssignID(Stores); + Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs)); // Remove the old stores. for (const PartStore &Part : Parts) diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index a0f7ec6..2dd0fde 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -948,17 +948,17 @@ void llvm::updateVCallVisibilityInIndex( // linker, as we have no information on their eventual use. if (DynamicExportSymbols.count(P.first)) continue; + // With validation enabled, we want to exclude symbols visible to regular + // objects. Local symbols will be in this group due to the current + // implementation but those with VCallVisibilityTranslationUnit will have + // already been marked in clang so are unaffected. + if (VisibleToRegularObjSymbols.count(P.first)) + continue; for (auto &S : P.second.getSummaryList()) { auto *GVar = dyn_cast<GlobalVarSummary>(S.get()); if (!GVar || GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) continue; - // With validation enabled, we want to exclude symbols visible to regular - // objects. Local symbols will be in this group due to the current - // implementation but those with VCallVisibilityTranslationUnit will have - // already been marked in clang so are unaffected. - if (VisibleToRegularObjSymbols.count(P.first)) - continue; GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); } } @@ -1161,14 +1161,10 @@ bool DevirtIndex::tryFindVirtualCallTargets( // and therefore the same GUID. This can happen if there isn't enough // distinguishing path when compiling the source file. In that case we // conservatively return false early. + if (P.VTableVI.hasLocal() && P.VTableVI.getSummaryList().size() > 1) + return false; const GlobalVarSummary *VS = nullptr; - bool LocalFound = false; for (const auto &S : P.VTableVI.getSummaryList()) { - if (GlobalValue::isLocalLinkage(S->linkage())) { - if (LocalFound) - return false; - LocalFound = true; - } auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject()); if (!CurVS->vTableFuncs().empty() || // Previously clang did not attach the necessary type metadata to @@ -1184,6 +1180,7 @@ bool DevirtIndex::tryFindVirtualCallTargets( // with public LTO visibility. if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) return false; + break; } } // There will be no VS if all copies are available_externally having no @@ -1411,9 +1408,8 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, // If the summary list contains multiple summaries where at least one is // a local, give up, as we won't know which (possibly promoted) name to use. - for (const auto &S : TheFn.getSummaryList()) - if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) - return false; + if (TheFn.hasLocal() && Size > 1) + return false; // Collect functions devirtualized at least for one call site for stats. if (PrintSummaryDevirt || AreStatisticsEnabled()) @@ -2591,6 +2587,11 @@ void DevirtIndex::run() { if (ExportSummary.typeIdCompatibleVtableMap().empty()) return; + // Assert that we haven't made any changes that would affect the hasLocal() + // flag on the GUID summary info. + assert(!ExportSummary.withInternalizeAndPromote() && + "Expect index-based WPD to run before internalization and promotion"); + DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID; for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) { NameByGUID[GlobalValue::getGUIDAssumingExternalLinkage(P.first)].push_back( diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 73ec451..9bee523 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2760,21 +2760,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (match(Op0, m_PtrToIntOrAddr(m_Value(LHSOp))) && + match(Op1, m_PtrToIntOrAddr(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), I.hasNoUnsignedWrap())) return replaceInstUsesWith(I, Res); // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (match(Op0, m_Trunc(m_PtrToIntOrAddr(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToIntOrAddr(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), /* IsNUW */ false)) return replaceInstUsesWith(I, Res); - if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && - match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) { + auto MatchSubOfZExtOfPtrToIntOrAddr = [&]() { + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToIntSameSize(DL, m_Value(RHSOp))))) + return true; + if (match(Op0, m_ZExt(m_PtrToAddr(m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToAddr(m_Value(RHSOp))))) + return true; + // Special case for non-canonical ptrtoint in constant expression, + // where the zext has been folded into the ptrtoint. + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + return true; + return false; + }; + if (MatchSubOfZExtOfPtrToIntOrAddr()) { if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) { if (GEP->getPointerOperand() == RHSOp) { if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index dab200d..8d9933b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) @@ -4003,18 +4015,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Try to fold intrinsic into select/phi operands. This is legal if: // * The intrinsic is speculatable. - // * The select condition is not a vector, or the intrinsic does not - // perform cross-lane operations. - if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) && - isNotCrossLaneOperation(II)) + // * The operand is one of the following: + // - a phi. + // - a select with a scalar condition. + // - a select with a vector condition and II is not a cross lane operation. + if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) { for (Value *Op : II->args()) { - if (auto *Sel = dyn_cast<SelectInst>(Op)) - if (Instruction *R = FoldOpIntoSelect(*II, Sel)) + if (auto *Sel = dyn_cast<SelectInst>(Op)) { + bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy(); + if (IsVectorCond && !isNotCrossLaneOperation(II)) + continue; + // Don't replace a scalar select with a more expensive vector select if + // we can't simplify both arms of the select. + bool SimplifyBothArms = + !Op->getType()->isVectorTy() && II->getType()->isVectorTy(); + if (Instruction *R = FoldOpIntoSelect( + *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms)) return R; + } if (auto *Phi = dyn_cast<PHINode>(Op)) if (Instruction *R = foldOpIntoPhi(*II, Phi)) return R; } + } if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 943c223..ede73f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -664,7 +664,8 @@ public: /// This also works for Cast instructions, which obviously do not have a /// second operand. Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse = false); + bool FoldWithMultiUse = false, + bool SimplifyBothArms = false); /// This is a convenience wrapper function for the above two functions. Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5aa8de3..f5130da 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(), CondVal, FalseVal)); + // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X, + // bitwidth-1, 1 -> scmp(X, 0) + // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + CmpPredicate Pred; + Value *CmpLHS, *CmpRHS; + + // Canonicalize sign function ashr patterns: + // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) + // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_Value(TrueVal), m_Value(FalseVal))) && + ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) && + match(TrueVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) && + match(FalseVal, m_One())) || + (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) && + match(TrueVal, m_One()) && + match(FalseVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) { + + Function *Scmp = Intrinsic::getOrInsertDeclaration( + SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()}); + return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)}); + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 3f11cae..9c8de45 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse) { + bool FoldWithMultiUse, + bool SimplifyBothArms) { // Don't modify shared select instructions unless set FoldWithMultiUse if (!SI->hasOneUse() && !FoldWithMultiUse) return nullptr; @@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (!NewTV && !NewFV) return nullptr; + if (SimplifyBothArms && !(NewTV && NewFV)) + return nullptr; + // Create an instruction for the arm that did not fold. if (!NewTV) NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this); @@ -2323,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, return ConstantVector::get(NewVecC); } +// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS). +static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, + Constant *Splat, bool SplatLHS, + const DataLayout &DL) { + ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount(); + Constant *LHS = ConstantVector::getSplat(EC, Splat); + Constant *RHS = Vector; + if (!SplatLHS) + std::swap(LHS, RHS); + return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); +} + Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { if (!isa<VectorType>(Inst.getType())) return nullptr; @@ -2334,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { assert(cast<VectorType>(RHS->getType())->getElementCount() == cast<VectorType>(Inst.getType())->getElementCount()); + auto foldConstantsThroughSubVectorInsertSplat = + [&](Value *MaybeSubVector, Value *MaybeSplat, + bool SplatLHS) -> Instruction * { + Value *Idx; + Constant *Splat, *SubVector, *Dest; + if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) || + !match(MaybeSubVector, + m_VectorInsert(m_Constant(Dest), m_Constant(SubVector), + m_Value(Idx)))) + return nullptr; + SubVector = + constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL); + Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL); + if (!SubVector || !Dest) + return nullptr; + auto *InsertVector = + Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx); + return replaceInstUsesWith(Inst, InsertVector); + }; + + // If one operand is a constant splat and the other operand is a + // `vector.insert` where both the destination and subvector are constant, + // apply the operation to both the destination and subvector, returning a new + // constant `vector.insert`. This helps constant folding for scalable vectors. + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false)) + return Folded; + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true)) + return Folded; + // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb..10b03bb 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>. + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa<VectorType>(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) || isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 4acc3f2..d347ced 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -614,6 +614,16 @@ static Decomposition decompose(Value *V, return {V, IsKnownNonNegative}; } + if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative() && + canUseSExt(CI)) { + Preconditions.emplace_back( + CmpInst::ICMP_UGE, Op0, + ConstantInt::get(Op0->getType(), CI->getSExtValue() * -1)); + if (auto Decomp = MergeResults(Op0, CI, true)) + return *Decomp; + return {V, IsKnownNonNegative}; + } + if (match(V, m_NSWAdd(m_Value(Op0), m_Value(Op1)))) { if (!isKnownNonNegative(Op0, DL)) Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0, @@ -627,16 +637,6 @@ static Decomposition decompose(Value *V, return {V, IsKnownNonNegative}; } - if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative() && - canUseSExt(CI)) { - Preconditions.emplace_back( - CmpInst::ICMP_UGE, Op0, - ConstantInt::get(Op0->getType(), CI->getSExtValue() * -1)); - if (auto Decomp = MergeResults(Op0, CI, true)) - return *Decomp; - return {V, IsKnownNonNegative}; - } - // Decompose or as an add if there are no common bits between the operands. if (match(V, m_DisjointOr(m_Value(Op0), m_ConstantInt(CI)))) { if (auto Decomp = MergeResults(Op0, CI, IsSigned)) diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp index a83cbd17a7..f273e9d 100644 --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -64,10 +64,10 @@ using namespace llvm; -namespace { - #define DEBUG_TYPE "mergeicmps" +namespace { + // A BCE atom "Binary Compare Expression Atom" represents an integer load // that is a constant offset from a base value, e.g. `a` or `o.c` in the example // at the top. @@ -128,11 +128,12 @@ private: unsigned Order = 1; DenseMap<const Value*, int> BaseToIndex; }; +} // namespace // If this value is a load from a constant offset w.r.t. a base address, and // there are no other users of the load or address, returns the base address and // the offset. -BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { +static BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { auto *const LoadI = dyn_cast<LoadInst>(Val); if (!LoadI) return {}; @@ -175,6 +176,7 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset); } +namespace { // A comparison between two BCE atoms, e.g. `a == o.a` in the example at the // top. // Note: the terminology is misleading: the comparison is symmetric, so there @@ -239,6 +241,7 @@ class BCECmpBlock { private: BCECmp Cmp; }; +} // namespace bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, AliasAnalysis &AA) const { @@ -302,9 +305,9 @@ bool BCECmpBlock::doesOtherWork() const { // Visit the given comparison. If this is a comparison between two valid // BCE atoms, returns the comparison. -std::optional<BCECmp> visitICmp(const ICmpInst *const CmpI, - const ICmpInst::Predicate ExpectedPredicate, - BaseIdentifier &BaseId) { +static std::optional<BCECmp> +visitICmp(const ICmpInst *const CmpI, + const ICmpInst::Predicate ExpectedPredicate, BaseIdentifier &BaseId) { // The comparison can only be used once: // - For intermediate blocks, as a branch condition. // - For the final block, as an incoming value for the Phi. @@ -332,10 +335,9 @@ std::optional<BCECmp> visitICmp(const ICmpInst *const CmpI, // Visit the given comparison block. If this is a comparison between two valid // BCE atoms, returns the comparison. -std::optional<BCECmpBlock> visitCmpBlock(Value *const Val, - BasicBlock *const Block, - const BasicBlock *const PhiBlock, - BaseIdentifier &BaseId) { +static std::optional<BCECmpBlock> +visitCmpBlock(Value *const Val, BasicBlock *const Block, + const BasicBlock *const PhiBlock, BaseIdentifier &BaseId) { if (Block->empty()) return std::nullopt; auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator()); @@ -397,6 +399,7 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons, Comparisons.push_back(std::move(Comparison)); } +namespace { // A chain of comparisons. class BCECmpChain { public: @@ -420,6 +423,7 @@ private: // The original entry block (before sorting); BasicBlock *EntryBlock_; }; +} // namespace static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) { return First.Lhs().BaseId == Second.Lhs().BaseId && @@ -742,9 +746,8 @@ bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA, return true; } -std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi, - BasicBlock *const LastBlock, - int NumBlocks) { +static std::vector<BasicBlock *> +getOrderedBlocks(PHINode &Phi, BasicBlock *const LastBlock, int NumBlocks) { // Walk up from the last block to find other blocks. std::vector<BasicBlock *> Blocks(NumBlocks); assert(LastBlock && "invalid last block"); @@ -777,8 +780,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi, return Blocks; } -bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA, - DomTreeUpdater &DTU) { +static bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, + AliasAnalysis &AA, DomTreeUpdater &DTU) { LLVM_DEBUG(dbgs() << "processPhi()\n"); if (Phi.getNumIncomingValues() <= 1) { LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); @@ -874,6 +877,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, return MadeChange; } +namespace { class MergeICmpsLegacyPass : public FunctionPass { public: static char ID; diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 8714741a..9829d4d 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1793,3 +1793,13 @@ bool llvm::hasOnlySimpleTerminator(const Function &F) { } return true; } + +Printable llvm::printBasicBlock(const BasicBlock *BB) { + return Printable([BB](raw_ostream &OS) { + if (!BB) { + OS << "<nullptr>"; + return; + } + BB->printAsOperand(OS); + }); +} diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 978d5a2..371d9e6 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -260,9 +260,16 @@ bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack, // next to the defs they must go with so that we can know it's time to pop // the stack when we hit the end of the phi uses for a given def. const ValueDFS &Top = *Stack.back().V; - if (Top.LocalNum == LN_Last && Top.PInfo) { - if (!VDUse.U) - return false; + assert(Top.PInfo && "RenameStack should only contain predicate infos (defs)"); + if (Top.LocalNum == LN_Last) { + if (!VDUse.U) { + assert(VDUse.PInfo && "A non-use VDUse should have a predicate info"); + // We should reserve adjacent LN_Last defs for the same phi use. + return VDUse.LocalNum == LN_Last && + // If the two phi defs have the same edge, they must be designated + // for the same succ BB. + getBlockEdge(Top.PInfo) == getBlockEdge(VDUse.PInfo); + } auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser()); if (!PHI) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index adf27be..3356516 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7231,6 +7231,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( return DenseMap<const SCEV *, Value *>(); } + VPlanTransforms::narrowInterleaveGroups( + BestVPlan, BestVF, + TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); VPlanTransforms::removeDeadRecipes(BestVPlan); VPlanTransforms::convertToConcreteRecipes(BestVPlan); @@ -8199,10 +8202,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, if (CM.foldTailWithEVL()) VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength, *Plan, CM.getMaxSafeElements()); - - if (auto P = VPlanTransforms::narrowInterleaveGroups(*Plan, TTI)) - VPlans.push_back(std::move(P)); - assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); VPlans.push_back(std::move(Plan)); } @@ -9860,6 +9859,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); + if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth()) + UserIC = 1; // Plan how to best vectorize. LVP.plan(UserVF, UserIC); @@ -9924,7 +9925,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizeLoop = false; } - if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { + if (UserIC == 1 && Hints.getInterleave() > 1) { + assert(!LVL.isSafeForAnyVectorWidth() && + "UserIC should only be ignored due to unsafe dependencies"); + LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n"); + IntDiagMsg = {"InterleavingUnsafe", + "Ignoring user-specified interleave count due to possibly " + "unsafe dependencies in the loop."}; + InterleaveLoop = false; + } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { // Tell the user interleaving was avoided up-front, despite being explicitly // requested. LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and " diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index c95c887..428a8f4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1191,7 +1191,6 @@ VPlan *VPlan::duplicate() { } Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount; Old2NewVPValues[&VF] = &NewPlan->VF; - Old2NewVPValues[&UF] = &NewPlan->UF; Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF; if (BackedgeTakenCount) { NewPlan->BackedgeTakenCount = new VPValue(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 167ba55..2591df8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2712,7 +2712,8 @@ public: static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPReductionSC || - R->getVPDefID() == VPRecipeBase::VPReductionEVLSC; + R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || + R->getVPDefID() == VPRecipeBase::VPPartialReductionSC; } static inline bool classof(const VPUser *U) { @@ -2783,7 +2784,10 @@ public: Opcode(Opcode), VFScaleFactor(ScaleFactor) { [[maybe_unused]] auto *AccumulatorRecipe = getChainOp()->getDefiningRecipe(); - assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) || + // When cloning as part of a VPExpressionRecipe the chain op could have + // replaced by a temporary VPValue, so it doesn't have a defining recipe. + assert((!AccumulatorRecipe || + isa<VPReductionPHIRecipe>(AccumulatorRecipe) || isa<VPPartialReductionRecipe>(AccumulatorRecipe)) && "Unexpected operand order for partial reduction recipe"); } @@ -3093,6 +3097,11 @@ public: /// removed before codegen. void decompose(); + unsigned getVFScaleFactor() const { + auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back()); + return PR ? PR->getVFScaleFactor() : 1; + } + /// Method for generating code, must not be called as this recipe is abstract. void execute(VPTransformState &State) override { llvm_unreachable("recipe must be removed before execute"); @@ -4152,9 +4161,6 @@ class VPlan { /// Represents the vectorization factor of the loop. VPValue VF; - /// Represents the symbolic unroll factor of the loop. - VPValue UF; - /// Represents the loop-invariant VF * UF of the vector loop region. VPValue VFxUF; @@ -4166,11 +4172,6 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector<VPValue *, 16> VPLiveIns; - /// Mapping from SCEVs to the VPValues representing their expansions. - /// NOTE: This mapping is temporary and will be removed once all users have - /// been modeled in VPlan directly. - DenseMap<const SCEV *, VPValue *> SCEVToExpansion; - /// Blocks allocated and owned by the VPlan. They will be deleted once the /// VPlan is destroyed. SmallVector<VPBlockBase *> CreatedBlocks; @@ -4308,9 +4309,6 @@ public: VPValue &getVF() { return VF; }; const VPValue &getVF() const { return VF; }; - /// Returns the symbolic UF of the vector loop region. - VPValue &getSymbolicUF() { return UF; }; - /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } @@ -4320,12 +4318,6 @@ public: void addVF(ElementCount VF) { VFs.insert(VF); } - /// Remove \p VF from the plan. - void removeVF(ElementCount VF) { - assert(hasVF(VF) && "tried to remove VF not present in plan"); - VFs.remove(VF); - } - void setVF(ElementCount VF) { assert(hasVF(VF) && "Cannot set VF not already in plan"); VFs.clear(); @@ -4427,15 +4419,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - VPValue *getSCEVExpansion(const SCEV *S) const { - return SCEVToExpansion.lookup(S); - } - - void addSCEVExpansion(const SCEV *S, VPValue *V) { - assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); - SCEVToExpansion[S] = V; - } - /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1f1b42b..931a5b7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -168,6 +168,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects(); case VPBlendSC: case VPReductionEVLSC: + case VPPartialReductionSC: case VPReductionSC: case VPScalarIVStepsSC: case VPVectorPointerSC: @@ -300,14 +301,23 @@ InstructionCost VPPartialReductionRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { std::optional<unsigned> Opcode; - VPValue *Op = getOperand(0); - VPRecipeBase *OpR = Op->getDefiningRecipe(); - - // If the partial reduction is predicated, a select will be operand 0 - if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) { - OpR = Op->getDefiningRecipe(); + VPValue *Op = getVecOp(); + uint64_t MulConst; + // If the partial reduction is predicated, a select will be operand 1. + // If it isn't predicated and the mul isn't operating on a constant, then it + // should have been turned into a VPExpressionRecipe. + // FIXME: Replace the entire function with this once all partial reduction + // variants are bundled into VPExpressionRecipe. + if (!match(Op, m_Select(m_VPValue(), m_VPValue(Op), m_VPValue())) && + !match(Op, m_Mul(m_VPValue(), m_ConstantInt(MulConst)))) { + auto *PhiType = Ctx.Types.inferScalarType(getChainOp()); + auto *InputType = Ctx.Types.inferScalarType(getVecOp()); + return Ctx.TTI.getPartialReductionCost(getOpcode(), InputType, InputType, + PhiType, VF, TTI::PR_None, + TTI::PR_None, {}, Ctx.CostKind); } + VPRecipeBase *OpR = Op->getDefiningRecipe(); Type *InputTypeA = nullptr, *InputTypeB = nullptr; TTI::PartialReductionExtendKind ExtAType = TTI::PR_None, ExtBType = TTI::PR_None; @@ -2856,11 +2866,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind()); switch (ExpressionType) { case ExpressionTypes::ExtendedReduction: { - return Ctx.TTI.getExtendedReductionCost( - Opcode, - cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() == - Instruction::ZExt, - RedTy, SrcVecTy, std::nullopt, Ctx.CostKind); + unsigned Opcode = RecurrenceDescriptor::getOpcode( + cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind()); + auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]); + return isa<VPPartialReductionRecipe>(ExpressionRecipes.back()) + ? Ctx.TTI.getPartialReductionCost( + Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr, + RedTy, VF, + TargetTransformInfo::getPartialReductionExtendKind( + ExtR->getOpcode()), + TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind) + : Ctx.TTI.getExtendedReductionCost( + Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy, + SrcVecTy, std::nullopt, Ctx.CostKind); } case ExpressionTypes::MulAccReduction: return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy, @@ -2871,6 +2889,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, Opcode = Instruction::Sub; [[fallthrough]]; case ExpressionTypes::ExtMulAccReduction: { + if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) { + auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]); + auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]); + auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]); + return Ctx.TTI.getPartialReductionCost( + Opcode, Ctx.Types.inferScalarType(getOperand(0)), + Ctx.Types.inferScalarType(getOperand(1)), RedTy, VF, + TargetTransformInfo::getPartialReductionExtendKind( + Ext0R->getOpcode()), + TargetTransformInfo::getPartialReductionExtendKind( + Ext1R->getOpcode()), + Mul->getOpcode(), Ctx.CostKind); + } return Ctx.TTI.getMulAccReductionCost( cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() == Instruction::ZExt, @@ -2910,12 +2941,13 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back()); unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()); + bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red); switch (ExpressionType) { case ExpressionTypes::ExtendedReduction: { getOperand(1)->printAsOperand(O, SlotTracker); - O << " +"; - O << " reduce." << Instruction::getOpcodeName(Opcode) << " ("; + O << " + " << (IsPartialReduction ? "partial." : "") << "reduce."; + O << Instruction::getOpcodeName(Opcode) << " ("; getOperand(0)->printAsOperand(O, SlotTracker); Red->printFlags(O); @@ -2931,8 +2963,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent, } case ExpressionTypes::ExtNegatedMulAccReduction: { getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker); - O << " + reduce." - << Instruction::getOpcodeName( + O << " + " << (IsPartialReduction ? "partial." : "") << "reduce."; + O << Instruction::getOpcodeName( RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind())) << " (sub (0, mul"; auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]); @@ -2956,9 +2988,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent, case ExpressionTypes::MulAccReduction: case ExpressionTypes::ExtMulAccReduction: { getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker); - O << " + "; - O << "reduce." - << Instruction::getOpcodeName( + O << " + " << (IsPartialReduction ? "partial." : "") << "reduce."; + O << Instruction::getOpcodeName( RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind())) << " ("; O << "mul"; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 48cf763..84817d7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } +/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. +/// Returns an optional pair, where the first element indicates whether it is +/// an intrinsic ID. +static std::optional<std::pair<bool, unsigned>> +getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { + return TypeSwitch<const VPSingleDefRecipe *, + std::optional<std::pair<bool, unsigned>>>(R) + .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, + VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( + [](auto *I) { return std::make_pair(false, I->getOpcode()); }) + .Case<VPWidenIntrinsicRecipe>([](auto *I) { + return std::make_pair(true, I->getVectorIntrinsicID()); + }) + .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) + .Default([](auto *) { return std::nullopt; }); +} + /// Try to fold \p R using InstSimplifyFolder. Will succeed and return a -/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are -/// foldable live-ins. -static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, - ArrayRef<VPValue *> Operands, - const DataLayout &DL, VPTypeAnalysis &TypeInfo) { +/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p +/// Operands are foldable live-ins. +static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R, + ArrayRef<VPValue *> Operands, + const DataLayout &DL, + VPTypeAnalysis &TypeInfo) { + auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R); + if (!OpcodeOrIID) + return nullptr; + SmallVector<Value *, 4> Ops; for (VPValue *Op : Operands) { if (!Op->isLiveIn() || !Op->getLiveInIRValue()) @@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, Ops.push_back(Op->getLiveInIRValue()); } - InstSimplifyFolder Folder(DL); - if (Instruction::isBinaryOp(Opcode)) - return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0], + auto FoldToIRValue = [&]() -> Value * { + InstSimplifyFolder Folder(DL); + if (OpcodeOrIID->first) { + if (R.getNumOperands() != 2) + return nullptr; + unsigned ID = OpcodeOrIID->second; + return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1], + TypeInfo.inferScalarType(&R)); + } + unsigned Opcode = OpcodeOrIID->second; + if (Instruction::isBinaryOp(Opcode)) + return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), + Ops[0], Ops[1]); + if (Instruction::isCast(Opcode)) + return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], + TypeInfo.inferScalarType(R.getVPSingleValue())); + switch (Opcode) { + case VPInstruction::LogicalAnd: + return Folder.FoldSelect(Ops[0], Ops[1], + ConstantInt::getNullValue(Ops[1]->getType())); + case VPInstruction::Not: + return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], + Constant::getAllOnesValue(Ops[0]->getType())); + case Instruction::Select: + return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ICmp: + case Instruction::FCmp: + return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], Ops[1]); - if (Instruction::isCast(Opcode)) - return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], - TypeInfo.inferScalarType(R.getVPSingleValue())); - switch (Opcode) { - case VPInstruction::LogicalAnd: - return Folder.FoldSelect(Ops[0], Ops[1], - ConstantInt::getNullValue(Ops[1]->getType())); - case VPInstruction::Not: - return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], - Constant::getAllOnesValue(Ops[0]->getType())); - case Instruction::Select: - return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); - case Instruction::ICmp: - case Instruction::FCmp: - return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], - Ops[1]); - case Instruction::GetElementPtr: { - auto &RFlags = cast<VPRecipeWithIRFlags>(R); - auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); - return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops), - RFlags.getGEPNoWrapFlags()); - } - case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: - return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0], - Ops[1], - cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); - // An extract of a live-in is an extract of a broadcast, so return the - // broadcasted element. - case Instruction::ExtractElement: - assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); - return Ops[0]; - } + case Instruction::GetElementPtr: { + auto &RFlags = cast<VPRecipeWithIRFlags>(R); + auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); + return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], + drop_begin(Ops), RFlags.getGEPNoWrapFlags()); + } + case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: + return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), + Ops[0], Ops[1], + cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); + // An extract of a live-in is an extract of a broadcast, so return the + // broadcasted element. + case Instruction::ExtractElement: + assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); + return Ops[0]; + } + return nullptr; + }; + + if (Value *V = FoldToIRValue()) + return R.getParent()->getPlan()->getOrAddLiveIn(V); return nullptr; } @@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // Simplification of live-in IR values for SingleDef recipes using // InstSimplifyFolder. - if (TypeSwitch<VPRecipeBase *, bool>(&R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) { - const DataLayout &DL = - Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); - Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL, - TypeInfo); - if (V) - I->replaceAllUsesWith(Plan->getOrAddLiveIn(V)); - return V; - }) - .Default([](auto *) { return false; })) - return; + const DataLayout &DL = + Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); + if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo)) + return Def->replaceAllUsesWith(V); // Fold PredPHI LiveIn -> LiveIn. if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) { @@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { return Def == getEmptyKey() || Def == getTombstoneKey(); } - /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. - /// Returns an optional pair, where the first element indicates whether it is - /// an intrinsic ID. - static std::optional<std::pair<bool, unsigned>> - getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { - return TypeSwitch<const VPSingleDefRecipe *, - std::optional<std::pair<bool, unsigned>>>(R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( - [](auto *I) { return std::make_pair(false, I->getOpcode()); }) - .Case<VPWidenIntrinsicRecipe>([](auto *I) { - return std::make_pair(true, I->getVectorIntrinsicID()); - }) - .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { - // For recipes that do not directly map to LLVM IR instructions, - // assign opcodes after the last VPInstruction opcode (which is also - // after the last IR Instruction opcode), based on the VPDefID. - return std::make_pair(false, - VPInstruction::OpsEnd + 1 + I->getVPDefID()); - }) - .Default([](auto *) { return std::nullopt; }); - } - /// If recipe \p R will lower to a GEP with a non-i8 source element type, /// return that source element type. static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) { @@ -3519,18 +3529,31 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VPValue *VecOp = Red->getVecOp(); // Clamp the range if using extended-reduction is profitable. - auto IsExtendedRedValidAndClampRange = [&](unsigned Opcode, bool isZExt, - Type *SrcTy) -> bool { + auto IsExtendedRedValidAndClampRange = + [&](unsigned Opcode, Instruction::CastOps ExtOpc, Type *SrcTy) -> bool { return LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF)); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost( - Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(), - CostKind); + + InstructionCost ExtRedCost; InstructionCost ExtCost = cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx); InstructionCost RedCost = Red->computeCost(VF, Ctx); + + if (isa<VPPartialReductionRecipe>(Red)) { + TargetTransformInfo::PartialReductionExtendKind ExtKind = + TargetTransformInfo::getPartialReductionExtendKind(ExtOpc); + // FIXME: Move partial reduction creation, costing and clamping + // here from LoopVectorize.cpp. + ExtRedCost = Ctx.TTI.getPartialReductionCost( + Opcode, SrcTy, nullptr, RedTy, VF, ExtKind, + llvm::TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind); + } else { + ExtRedCost = Ctx.TTI.getExtendedReductionCost( + Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy, + Red->getFastMathFlags(), CostKind); + } return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost; }, Range); @@ -3541,8 +3564,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) && IsExtendedRedValidAndClampRange( RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()), - cast<VPWidenCastRecipe>(VecOp)->getOpcode() == - Instruction::CastOps::ZExt, + cast<VPWidenCastRecipe>(VecOp)->getOpcode(), Ctx.Types.inferScalarType(A))) return new VPExpressionRecipe(cast<VPWidenCastRecipe>(VecOp), Red); @@ -3560,6 +3582,8 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, static VPExpressionRecipe * tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range) { + bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red); + unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()); if (Opcode != Instruction::Add && Opcode != Instruction::Sub) return nullptr; @@ -3568,16 +3592,41 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, // Clamp the range if using multiply-accumulate-reduction is profitable. auto IsMulAccValidAndClampRange = - [&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, - VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool { + [&](VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, + VPWidenCastRecipe *OuterExt) -> bool { return LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; Type *SrcTy = Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy; - auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF)); - InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost( - isZExt, Opcode, RedTy, SrcVecTy, CostKind); + InstructionCost MulAccCost; + + if (IsPartialReduction) { + Type *SrcTy2 = + Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr; + // FIXME: Move partial reduction creation, costing and clamping + // here from LoopVectorize.cpp. + MulAccCost = Ctx.TTI.getPartialReductionCost( + Opcode, SrcTy, SrcTy2, RedTy, VF, + Ext0 ? TargetTransformInfo::getPartialReductionExtendKind( + Ext0->getOpcode()) + : TargetTransformInfo::PR_None, + Ext1 ? TargetTransformInfo::getPartialReductionExtendKind( + Ext1->getOpcode()) + : TargetTransformInfo::PR_None, + Mul->getOpcode(), CostKind); + } else { + // Only partial reductions support mixed extends at the moment. + if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode()) + return false; + + bool IsZExt = + !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt; + auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF)); + MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy, + SrcVecTy, CostKind); + } + InstructionCost MulCost = Mul->computeCost(VF, Ctx); InstructionCost RedCost = Red->computeCost(VF, Ctx); InstructionCost ExtCost = 0; @@ -3611,14 +3660,10 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe()); auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe()); - // Match reduce.add(mul(ext, ext)). - if (RecipeA && RecipeB && - (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) && - match(RecipeA, m_ZExtOrSExt(m_VPValue())) && + // Match reduce.add/sub(mul(ext, ext)). + if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) && match(RecipeB, m_ZExtOrSExt(m_VPValue())) && - IsMulAccValidAndClampRange(RecipeA->getOpcode() == - Instruction::CastOps::ZExt, - Mul, RecipeA, RecipeB, nullptr)) { + IsMulAccValidAndClampRange(Mul, RecipeA, RecipeB, nullptr)) { if (Sub) return new VPExpressionRecipe(RecipeA, RecipeB, Mul, cast<VPWidenRecipe>(Sub), Red); @@ -3626,8 +3671,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, } // Match reduce.add(mul). // TODO: Add an expression type for this variant with a negated mul - if (!Sub && - IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr)) + if (!Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr)) return new VPExpressionRecipe(Mul, Red); } // TODO: Add an expression type for negated versions of other expression @@ -3647,9 +3691,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe()); if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) && Ext0->getOpcode() == Ext1->getOpcode() && - IsMulAccValidAndClampRange(Ext0->getOpcode() == - Instruction::CastOps::ZExt, - Mul, Ext0, Ext1, Ext)) { + IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) { auto *NewExt0 = new VPWidenCastRecipe( Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0, *Ext0, Ext0->getDebugLoc()); @@ -3956,9 +3998,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, // used. // TODO: Assert that they aren't used. - VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); - Plan.getSymbolicUF().replaceAllUsesWith(UF); - // If there are no users of the runtime VF, compute VFxUF by constant folding // the multiplication of VF and UF. if (VF.getNumUsers() == 0) { @@ -3978,6 +4017,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, } VF.replaceAllUsesWith(RuntimeVF); + VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF}); VFxUF.replaceAllUsesWith(MulByUF); } @@ -4045,14 +4085,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, return false; } -/// Returns VF from \p VFs if \p IR is a full interleave group with factor and -/// number of members both equal to VF. The interleave group must also access -/// the full vector width. -static std::optional<ElementCount> isConsecutiveInterleaveGroup( - VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs, - VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) { +/// Returns true if \p IR is a full interleave group with factor and number of +/// members both equal to \p VF. The interleave group must also access the full +/// vector width \p VectorRegWidth. +static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, + unsigned VF, VPTypeAnalysis &TypeInfo, + unsigned VectorRegWidth) { if (!InterleaveR || InterleaveR->getMask()) - return std::nullopt; + return false; Type *GroupElementTy = nullptr; if (InterleaveR->getStoredValues().empty()) { @@ -4061,7 +4101,7 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup( [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return std::nullopt; + return false; } else { GroupElementTy = TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]); @@ -4069,27 +4109,13 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup( [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return std::nullopt; + return false; } - auto GetVectorWidthForVF = [&TTI](ElementCount VF) { - TypeSize Size = TTI.getRegisterBitWidth( - VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector - : TargetTransformInfo::RGK_ScalableVector); - assert(Size.isScalable() == VF.isScalable() && - "if Size is scalable, VF must to and vice versa"); - return Size.getKnownMinValue(); - }; - - for (ElementCount VF : VFs) { - unsigned MinVal = VF.getKnownMinValue(); - unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal; - auto IG = InterleaveR->getInterleaveGroup(); - if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal && - GroupSize == GetVectorWidthForVF(VF)) - return {VF}; - } - return std::nullopt; + unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF; + auto IG = InterleaveR->getInterleaveGroup(); + return IG->getFactor() == VF && IG->getNumMembers() == VF && + GroupSize == VectorRegWidth; } /// Returns true if \p VPValue is a narrow VPValue. @@ -4100,18 +4126,16 @@ static bool isAlreadyNarrow(VPValue *VPV) { return RepR && RepR->isSingleScalar(); } -std::unique_ptr<VPlan> -VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, - const TargetTransformInfo &TTI) { - using namespace llvm::VPlanPatternMatch; +void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, + unsigned VectorRegWidth) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); - - if (!VectorLoop) - return nullptr; + if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0) + return; VPTypeAnalysis TypeInfo(Plan); + + unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; - std::optional<ElementCount> VFToOptimize; for (auto &R : *VectorLoop->getEntryBasicBlock()) { if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; @@ -4125,33 +4149,30 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, // * recipes writing to memory except interleave groups // Only support plans with a canonical induction phi. if (R.isPhi()) - return nullptr; + return; auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R); if (R.mayWriteToMemory() && !InterleaveR) - return nullptr; + return; + + // Do not narrow interleave groups if there are VectorPointer recipes and + // the plan was unrolled. The recipe implicitly uses VF from + // VPTransformState. + // TODO: Remove restriction once the VF for the VectorPointer offset is + // modeled explicitly as operand. + if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1) + return; // All other ops are allowed, but we reject uses that cannot be converted // when checking all allowed consumers (store interleave groups) below. if (!InterleaveR) continue; - // Try to find a single VF, where all interleave groups are consecutive and - // saturate the full vector width. If we already have a candidate VF, check - // if it is applicable for the current InterleaveR, otherwise look for a - // suitable VF across the Plans VFs. - // - if (VFToOptimize) { - if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo, - TTI)) - return nullptr; - } else { - if (auto VF = isConsecutiveInterleaveGroup( - InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI)) - VFToOptimize = *VF; - else - return nullptr; - } + // Bail out on non-consecutive interleave groups. + if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo, + VectorRegWidth)) + return; + // Skip read interleave groups. if (InterleaveR->getStoredValues().empty()) continue; @@ -4185,34 +4206,24 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>( InterleaveR->getStoredValues()[0]->getDefiningRecipe()); if (!WideMember0) - return nullptr; + return; for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) { auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe()); if (!R || R->getOpcode() != WideMember0->getOpcode() || R->getNumOperands() > 2) - return nullptr; + return; if (any_of(enumerate(R->operands()), [WideMember0, Idx = I](const auto &P) { const auto &[OpIdx, OpV] = P; return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx); })) - return nullptr; + return; } StoreGroups.push_back(InterleaveR); } if (StoreGroups.empty()) - return nullptr; - - // All interleave groups in Plan can be narrowed for VFToOptimize. Split the - // original Plan into 2: a) a new clone which contains all VFs of Plan, except - // VFToOptimize, and b) the original Plan with VFToOptimize as single VF. - std::unique_ptr<VPlan> NewPlan; - if (size(Plan.vectorFactors()) != 1) { - NewPlan = std::unique_ptr<VPlan>(Plan.duplicate()); - Plan.setVF(*VFToOptimize); - NewPlan->removeVF(*VFToOptimize); - } + return; // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe. SmallPtrSet<VPValue *, 4> NarrowedOps; @@ -4283,8 +4294,9 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); - VPValue *UF = &Plan.getSymbolicUF(); - if (VFToOptimize->isScalable()) { + VPValue *UF = Plan.getOrAddLiveIn( + ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF())); + if (VF.isScalable()) { VPValue *VScale = PHBuilder.createElementCount( CanIV->getScalarType(), ElementCount::getScalable(1)); VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF}); @@ -4296,10 +4308,6 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1))); } removeDeadRecipes(Plan); - assert(none_of(*VectorLoop->getEntryBasicBlock(), - IsaPred<VPVectorPointerRecipe>) && - "All VPVectorPointerRecipes should have been removed"); - return NewPlan; } /// Add branch weight metadata, if the \p Plan's middle block is terminated by a diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ca8d956..b28559b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -341,20 +341,14 @@ struct VPlanTransforms { static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan, ScalarEvolution &SE); - /// Try to find a single VF among \p Plan's VFs for which all interleave - /// groups (with known minimum VF elements) can be replaced by wide loads and - /// stores processing VF elements, if all transformed interleave groups access - /// the full vector width (checked via the maximum vector register width). If - /// the transformation can be applied, the original \p Plan will be split in - /// 2: - /// 1. The original Plan with the single VF containing the optimized recipes - /// using wide loads instead of interleave groups. - /// 2. A new clone which contains all VFs of Plan except the optimized VF. - /// - /// This effectively is a very simple form of loop-aware SLP, where we use - /// interleave groups to identify candidates. - static std::unique_ptr<VPlan> - narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI); + /// Try to convert a plan with interleave groups with VF elements to a plan + /// with the interleave groups replaced by wide loads and stores processing VF + /// elements, if all transformed interleave groups access the full vector + /// width (checked via \o VectorRegWidth). This effectively is a very simple + /// form of loop-aware SLP, where we use interleave groups to identify + /// candidates. + static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, + unsigned VectorRegWidth); /// Predicate and linearize the control-flow in the only loop region of /// \p Plan. If \p FoldTail is true, create a mask guarding the loop diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 32e4b88..fe66f13 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { - if (auto *Expanded = Plan.getSCEVExpansion(Expr)) - return Expanded; VPValue *Expanded = nullptr; if (auto *E = dyn_cast<SCEVConstant>(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); @@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } - Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } @@ -151,6 +148,8 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { return RR->getVFScaleFactor(); if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) return RR->getVFScaleFactor(); + if (auto *ER = dyn_cast<VPExpressionRecipe>(R)) + return ER->getVFScaleFactor(); assert( (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != VPInstruction::ReductionStartVector) && |
