diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
13 files changed, 265 insertions, 146 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index e8d3161..ad8368e 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -597,6 +597,14 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { return Thunk; } +std::optional<std::string> getArm64ECMangledFunctionName(GlobalValue &GV) { + if (!GV.hasName()) { + GV.setName("__unnamed"); + } + + return llvm::getArm64ECMangledFunctionName(GV.getName()); +} + // Builds the "guest exit thunk", a helper to call a function which may or may // not be an exit thunk. (We optimistically assume non-dllimport function // declarations refer to functions defined in AArch64 code; if the linker @@ -608,7 +616,7 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { getThunkType(F->getFunctionType(), F->getAttributes(), Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty, ArgTranslations); - auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); + auto MangledName = getArm64ECMangledFunctionName(*F); assert(MangledName && "Can't guest exit to function that's already native"); std::string ThunkName = *MangledName; if (ThunkName[0] == '?' && ThunkName.find("@") != std::string::npos) { @@ -727,9 +735,6 @@ AArch64Arm64ECCallLowering::buildPatchableThunk(GlobalAlias *UnmangledAlias, // Lower an indirect call with inline code. void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) { - assert(CB->getModule()->getTargetTriple().isOSWindows() && - "Only applicable for Windows targets"); - IRBuilder<> B(CB); Value *CalledOperand = CB->getCalledOperand(); @@ -790,7 +795,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { if (!F) continue; if (std::optional<std::string> MangledName = - getArm64ECMangledFunctionName(A.getName().str())) { + getArm64ECMangledFunctionName(A)) { F->addMetadata("arm64ec_unmangled_name", *MDNode::get(M->getContext(), MDString::get(M->getContext(), A.getName()))); @@ -807,7 +812,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); if (PersFn->getValueType() && PersFn->getValueType()->isFunctionTy()) { if (std::optional<std::string> MangledName = - getArm64ECMangledFunctionName(PersFn->getName().str())) { + getArm64ECMangledFunctionName(*PersFn)) { PersFn->setName(MangledName.value()); } } @@ -821,7 +826,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { // Rename hybrid patchable functions and change callers to use a global // alias instead. if (std::optional<std::string> MangledName = - getArm64ECMangledFunctionName(F.getName().str())) { + getArm64ECMangledFunctionName(F)) { std::string OrigName(F.getName()); F.setName(MangledName.value() + HybridPatchableTargetSuffix); @@ -927,7 +932,7 @@ bool AArch64Arm64ECCallLowering::processFunction( // FIXME: Handle functions with weak linkage? if (!F.hasLocalLinkage() || F.hasAddressTaken()) { if (std::optional<std::string> MangledName = - getArm64ECMangledFunctionName(F.getName().str())) { + getArm64ECMangledFunctionName(F)) { F.addMetadata("arm64ec_unmangled_name", *MDNode::get(M->getContext(), MDString::get(M->getContext(), F.getName()))); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2b6ea86..a40de86b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8537,7 +8537,7 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, if (IsCalleeWin64) { UseVarArgCC = true; } else { - UseVarArgCC = !Outs[i].IsFixed; + UseVarArgCC = ArgFlags.isVarArg(); } } @@ -8982,7 +8982,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector()) + if (Outs[i].Flags.isVarArg() && Outs[i].VT.isScalableVector()) report_fatal_error("Passing SVE types to variadic functions is " "currently not supported"); } @@ -11390,13 +11390,18 @@ SDValue AArch64TargetLowering::LowerSELECT_CC( // select_cc lhs, rhs, sub(rhs, lhs), sub(lhs, rhs), cc -> // select_cc lhs, rhs, neg(sub(lhs, rhs)), sub(lhs, rhs), cc // The second forms can be matched into subs+cneg. + // NOTE: Drop poison generating flags from the negated operand to avoid + // inadvertently propagating poison after the canonicalisation. if (TVal.getOpcode() == ISD::SUB && FVal.getOpcode() == ISD::SUB) { if (TVal.getOperand(0) == LHS && TVal.getOperand(1) == RHS && - FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS) + FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS) { + TVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); FVal = DAG.getNegative(TVal, DL, TVal.getValueType()); - else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS && - FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS) + } else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS && + FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS) { + FVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); TVal = DAG.getNegative(FVal, DL, FVal.getValueType()); + } } unsigned Opcode = AArch64ISD::CSEL; @@ -13477,7 +13482,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT, // Look for the first non-undef element. const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); - // Benefit form APInt to handle overflow when calculating expected element. + // Benefit from APInt to handle overflow when calculating expected element. unsigned NumElts = VT.getVectorNumElements(); unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, /*isSigned=*/false, @@ -13485,7 +13490,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT, // The following shuffle indices must be the successive elements after the // first real element. bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](int Elt) { - return Elt != ExpectedElt++ && Elt != -1; + return Elt != ExpectedElt++ && Elt >= 0; }); if (FoundWrongElt) return false; @@ -15772,6 +15777,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { isREVMask(M, EltSize, NumElts, 32) || isREVMask(M, EltSize, NumElts, 16) || isEXTMask(M, VT, DummyBool, DummyUnsigned) || + isSingletonEXTMask(M, VT, DummyUnsigned) || isTRNMask(M, NumElts, DummyUnsigned) || isUZPMask(M, NumElts, DummyUnsigned) || isZIPMask(M, NumElts, DummyUnsigned) || @@ -16284,9 +16290,8 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, Chain = SP.getValue(1); SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) - SP = - DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), + DAG.getSignedConstant(-Align->value(), DL, VT)); Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP); SDValue Ops[2] = {SP, Chain}; return DAG.getMergeValues(Ops, DL); @@ -16323,7 +16328,7 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + DAG.getSignedConstant(-Align->value(), DL, VT)); Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP); Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL); @@ -16351,7 +16356,7 @@ AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + DAG.getSignedConstant(-Align->value(), DL, VT)); // Set the real SP to the new value with a probing loop. Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, DL, MVT::Other, Chain, SP); @@ -25450,6 +25455,29 @@ static SDValue performCSELCombine(SDNode *N, } } + // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't + // use overflow flags, to avoid the comparison with zero. In case of success, + // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). + // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB + // nodes with their SUBS equivalent as is already done for other flag-setting + // operators, in which case doing the replacement here becomes redundant. + if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && + isNullConstant(Cond.getOperand(1))) { + SDValue Sub = Cond.getOperand(0); + AArch64CC::CondCode CC = + static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2)); + if (Sub.getOpcode() == ISD::SUB && + (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || + CC == AArch64CC::PL)) { + SDLoc DL(N); + SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), + Sub.getOperand(0), Sub.getOperand(1)); + DCI.CombineTo(Sub.getNode(), Subs); + DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); + return SDValue(N, 0); + } + } + // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; @@ -28609,14 +28637,16 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { void AArch64TargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT provides functionalities for stack protection. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) { + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) { // MSVC CRT has a global variable holding security cookie. M.getOrInsertGlobal("__security_cookie", PointerType::getUnqual(M.getContext())); // MSVC CRT has a function to validate security cookie. FunctionCallee SecurityCheckCookie = - M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(), + M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall), Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext())); if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { @@ -28637,8 +28667,10 @@ Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const { Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const { // MSVC CRT has a function to validate security cookie. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) - return M.getFunction(Subtarget->getSecurityCheckCookieName()); + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); return TargetLowering::getSSPStackGuardCheck(M); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ea63edd8..8887657 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -887,6 +887,10 @@ private: bool shouldScalarizeBinop(SDValue VecOp) const override { return VecOp.getOpcode() == ISD::SETCC; } + + bool hasMultipleConditionRegisters(EVT VT) const override { + return VT.isScalableVector(); + } }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index ba7cbcc..d068a12 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -6484,7 +6484,9 @@ class BaseSIMDThreeSameVectorDot<bit Q, bit U, bits<2> sz, bits<4> opc, string a (OpNode (AccumType RegType:$Rd), (InputType RegType:$Rn), (InputType RegType:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); + + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # + "|" # kind1 # "\t$Rd, $Rn, $Rm}"); } multiclass SIMDThreeSameVectorDot<bit U, bit Mixed, string asm, SDPatternOperator OpNode> { @@ -6507,7 +6509,8 @@ class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm (OpNode (AccumType RegType:$Rd), (InputType RegType:$Rn), (InputType RegType:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # + "|" # kind1 # "\t$Rd, $Rn, $Rm}"); let Inst{13} = b13; } @@ -8986,7 +8989,8 @@ class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1, (InputType RegType:$Rm)))]> { let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # - ", $Rm" # kind2 # "}"); + ", $Rm" # kind2 # + "|" # kind1 # "\t$Rd, $Rn, $Rm}"); } multiclass SIMDThreeSameVectorBFDot<bit U, string asm> { @@ -9032,7 +9036,7 @@ class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode> [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}"); + let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h|.4s\t$Rd, $Rn, $Rm}"); } let mayRaiseFPException = 1, Uses = [FPCR] in @@ -9071,8 +9075,7 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm> (int_aarch64_neon_bfmmla (v4f32 V128:$Rd), (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h", - ", $Rm", ".8h", "}"); + let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h|.4s\t$Rd, $Rn, $Rm}"); } let mayRaiseFPException = 1, Uses = [FPCR] in @@ -9143,7 +9146,7 @@ class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNo [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]> { - let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b}"; + let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b|.4s\t$Rd, $Rn, $Rm}"; } //---------------------------------------------------------------------------- @@ -12561,7 +12564,7 @@ multiclass STOPregister<string asm, string instr> { let Predicates = [HasLSUI] in class BaseSTOPregisterLSUI<string asm, RegisterClass OP, Register Reg, Instruction inst> : - InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn), 0>; + InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>; multiclass STOPregisterLSUI<string asm, string instr> { def : BaseSTOPregisterLSUI<asm # "l", GPR32, WZR, @@ -13344,8 +13347,8 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> { class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind> : BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101, V128, asm, ".16b", []> { - let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b", - ", $Rm", ".16b", "}"); + let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn.16b, $Rm.16b", + "|", kind, "\t$Rd, $Rn, $Rm}"); } multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{ diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 59d4fd2..fb59c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5861,33 +5861,41 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( } } -// Convenience function to create a DWARF expression for -// Expr + NumBytes + NumVGScaledBytes * AArch64::VG -static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes, - int NumVGScaledBytes, unsigned VG, - llvm::raw_string_ostream &Comment) { - uint8_t buffer[16]; - - if (NumBytes) { +// Convenience function to create a DWARF expression for: Constant `Operation`. +// This helper emits compact sequences for common cases. For example, for`-15 +// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus. +static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Constant, + dwarf::LocationAtom Operation) { + if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) { + // -Constant (1 to 31) + Expr.push_back(dwarf::DW_OP_lit0 - Constant); + Operation = dwarf::DW_OP_minus; + } else if (Constant >= 0 && Constant <= 31) { + // Literal value 0 to 31 + Expr.push_back(dwarf::DW_OP_lit0 + Constant); + } else { + // Signed constant Expr.push_back(dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); - Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + appendLEB128<LEB128Sign::Signed>(Expr, Constant); } + return Expr.push_back(Operation); +} - if (NumVGScaledBytes) { - Expr.push_back((uint8_t)dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); - - Expr.push_back((uint8_t)dwarf::DW_OP_bregx); - Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); - Expr.push_back(0); - - Expr.push_back((uint8_t)dwarf::DW_OP_mul); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); +// Convenience function to create a DWARF expression for a register. +static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) { + Expr.push_back((char)dwarf::DW_OP_bregx); + appendLEB128<LEB128Sign::Unsigned>(Expr, RegNum); + Expr.push_back(0); +} - Comment << (NumVGScaledBytes < 0 ? " - " : " + ") - << std::abs(NumVGScaledBytes) << " * VG"; +// Convenience function to create a comment for +// (+/-) NumBytes (* RegScale)? +static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, + StringRef RegScale = {}) { + if (NumBytes) { + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + if (!RegScale.empty()) + Comment << ' ' << RegScale; } } @@ -5909,19 +5917,26 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, else Comment << printReg(Reg, &TRI); - // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG) + // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes) SmallString<64> Expr; unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); - Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); - Expr.push_back(0); - appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); + assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)"); + // Reg + NumBytes + Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg); + appendLEB128<LEB128Sign::Signed>(Expr, NumBytes); + appendOffsetComment(NumBytes, Comment); + if (NumVGScaledBytes) { + // + VG * NumVGScaledBytes + appendOffsetComment(NumVGScaledBytes, Comment, "* VG"); + appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true)); + appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul); + Expr.push_back(dwarf::DW_OP_plus); + } // Wrap this into DW_CFA_def_cfa. SmallString<64> DefCfaExpr; DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - uint8_t buffer[16]; - DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); + appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size()); DefCfaExpr.append(Expr.str()); return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), Comment.str()); @@ -5958,17 +5973,25 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, llvm::raw_string_ostream Comment(CommentBuffer); Comment << printReg(Reg, &TRI) << " @ cfa"; - // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes) + assert(NumVGScaledBytes && "Expected scalable offset"); SmallString<64> OffsetExpr; - appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); + // + VG * NumVGScaledBytes + appendOffsetComment(NumVGScaledBytes, Comment, "* VG"); + appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true)); + appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul); + OffsetExpr.push_back(dwarf::DW_OP_plus); + if (NumBytes) { + // + NumBytes + appendOffsetComment(NumBytes, Comment); + appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus); + } // Wrap this into DW_CFA_expression SmallString<64> CfaExpr; CfaExpr.push_back(dwarf::DW_CFA_expression); - uint8_t buffer[16]; - CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); - CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg); + appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size()); CfaExpr.append(OffsetExpr.str()); return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(), diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index adc984a..1bc1d98 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -22,7 +22,8 @@ def TuneA320 : SubtargetFeature<"a320", "ARMProcFamily", "CortexA320", FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, - FeatureUseWzrToVecMove]>; + FeatureUseWzrToVecMove, + FeatureUseFixedOverScalableIfEqualCost]>; def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [ @@ -45,7 +46,8 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, - FeatureUseWzrToVecMove + FeatureUseWzrToVecMove, + FeatureUseFixedOverScalableIfEqualCost ]>; def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520", @@ -53,7 +55,8 @@ def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520", FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, - FeatureUseWzrToVecMove]>; + FeatureUseWzrToVecMove, + FeatureUseFixedOverScalableIfEqualCost]>; def TuneA520AE : SubtargetFeature<"a520ae", "ARMProcFamily", "CortexA520", "Cortex-A520AE ARM processors", [ @@ -756,7 +759,6 @@ def ProcessorFeatures { FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE, - FeatureUseFixedOverScalableIfEqualCost, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC]; list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVEBitPerm, @@ -766,7 +768,6 @@ def ProcessorFeatures { FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureNEON, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureUseFixedOverScalableIfEqualCost, FeatureDotProd, FeatureFPAC]; list<SubtargetFeature> A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVEBitPerm, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 061ed61..d00e447 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -451,12 +451,6 @@ public: return "__chkstk"; } - const char* getSecurityCheckCookieName() const { - if (isWindowsArm64EC()) - return "#__security_check_cookie_arm64ec"; - return "__security_check_cookie"; - } - /// Choose a method of checking LR before performing a tail call. AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e1adc0b..9f05add 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3092,6 +3092,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); + // For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as + // we use fcvtx under SVE2. Give them invalid costs. + if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() && + ISD == ISD::FP_ROUND && SrcTy.isScalableVector() && + DstTy.getScalarType() == MVT::bf16 && SrcTy.getScalarType() == MVT::f64) + return InstructionCost::getInvalid(); + static const TypeConversionCostTblEntry BF16Tbl[] = { {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt @@ -3100,6 +3107,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1 + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // 2*fcvtx+2*bfcvt+uzp1 + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp }; if (ST->hasBF16()) @@ -3508,11 +3521,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1}, {ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3}, + // Truncate from nxvmf32 to nxvmbf16. + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8}, + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8}, + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17}, + // Truncate from nxvmf64 to nxvmf16. {ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1}, {ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3}, {ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7}, + // Truncate from nxvmf64 to nxvmbf16. + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9}, + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19}, + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39}, + // Truncate from nxvmf64 to nxvmf32. {ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1}, {ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3}, @@ -3523,11 +3546,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1}, {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2}, + // Extend from nxvmbf16 to nxvmf32. + {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl + {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl + {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4}, // unpck+unpck+lsl+lsl + // Extend from nxvmf16 to nxvmf64. {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1}, {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2}, {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4}, + // Extend from nxvmbf16 to nxvmf64. + {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt + {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // 2*unpck+2*lsl+2*fcvt + {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt + // Extend from nxvmf32 to nxvmf64. {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1}, {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2}, @@ -4282,10 +4315,9 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const { - int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register // width. TODO: Improve this with different cost kinds. - if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) { + if (isa<FixedVectorType>(ValTy) && Opcode == Instruction::Select) { // We would need this many instructions to hide the scalarization happening. const int AmortizationCost = 20; @@ -4315,55 +4347,72 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( return LT.first; } - static const TypeConversionCostTblEntry - VectorSelectTbl[] = { - { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 }, - { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 }, - { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 }, - { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 }, - { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 }, - { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, - { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, - { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, - { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } - }; + static const TypeConversionCostTblEntry VectorSelectTbl[] = { + {Instruction::Select, MVT::v2i1, MVT::v2f32, 2}, + {Instruction::Select, MVT::v2i1, MVT::v2f64, 2}, + {Instruction::Select, MVT::v4i1, MVT::v4f32, 2}, + {Instruction::Select, MVT::v4i1, MVT::v4f16, 2}, + {Instruction::Select, MVT::v8i1, MVT::v8f16, 2}, + {Instruction::Select, MVT::v16i1, MVT::v16i16, 16}, + {Instruction::Select, MVT::v8i1, MVT::v8i32, 8}, + {Instruction::Select, MVT::v16i1, MVT::v16i32, 16}, + {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost}, + {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost}, + {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}}; EVT SelCondTy = TLI->getValueType(DL, CondTy); EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { - if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD, + if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, Opcode, SelCondTy.getSimpleVT(), SelValTy.getSimpleVT())) return Entry->Cost; } } - if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) { - Type *ValScalarTy = ValTy->getScalarType(); - if ((ValScalarTy->isHalfTy() && !ST->hasFullFP16()) || - ValScalarTy->isBFloatTy()) { - auto *ValVTy = cast<FixedVectorType>(ValTy); - - // Without dedicated instructions we promote [b]f16 compares to f32. - auto *PromotedTy = - VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy); - - InstructionCost Cost = 0; - // Promote operands to float vectors. - Cost += 2 * getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy, - TTI::CastContextHint::None, CostKind); - // Compare float vectors. + if (Opcode == Instruction::FCmp) { + // Without dedicated instructions we promote f16 + bf16 compares to f32. + if ((!ST->hasFullFP16() && ValTy->getScalarType()->isHalfTy()) || + ValTy->getScalarType()->isBFloatTy()) { + Type *PromotedTy = + ValTy->getWithNewType(Type::getFloatTy(ValTy->getContext())); + InstructionCost Cost = + getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy, + TTI::CastContextHint::None, CostKind); + if (!Op1Info.isConstant() && !Op2Info.isConstant()) + Cost *= 2; Cost += getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind, Op1Info, Op2Info); - // During codegen we'll truncate the vector result from i32 to i16. - Cost += - getCastInstrCost(Instruction::Trunc, VectorType::getInteger(ValVTy), - VectorType::getInteger(PromotedTy), - TTI::CastContextHint::None, CostKind); + if (ValTy->isVectorTy()) + Cost += getCastInstrCost( + Instruction::Trunc, VectorType::getInteger(cast<VectorType>(ValTy)), + VectorType::getInteger(cast<VectorType>(PromotedTy)), + TTI::CastContextHint::None, CostKind); return Cost; } + + auto LT = getTypeLegalizationCost(ValTy); + // Model unknown fp compares as a libcall. + if (LT.second.getScalarType() != MVT::f64 && + LT.second.getScalarType() != MVT::f32 && + LT.second.getScalarType() != MVT::f16) + return LT.first * getCallInstrCost(/*Function*/ nullptr, ValTy, + {ValTy, ValTy}, CostKind); + + // Some comparison operators require expanding to multiple compares + or. + unsigned Factor = 1; + if (!CondTy->isVectorTy() && + (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ)) + Factor = 2; // fcmp with 2 selects + else if (isa<FixedVectorType>(ValTy) && + (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ || + VecPred == FCmpInst::FCMP_ORD || VecPred == FCmpInst::FCMP_UNO)) + Factor = 3; // fcmxx+fcmyy+or + else if (isa<ScalableVectorType>(ValTy) && + (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ)) + Factor = 3; // fcmxx+fcmyy+or + + return Factor * (CostKind == TTI::TCK_Latency ? 2 : LT.first); } // Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to @@ -4371,7 +4420,7 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( // comparison is not unsigned. FIXME: Enable for non-throughput cost kinds // providing it will not cause performance regressions. if (CostKind == TTI::TCK_RecipThroughput && ValTy->isIntegerTy() && - ISD == ISD::SETCC && I && !CmpInst::isUnsigned(VecPred) && + Opcode == Instruction::ICmp && I && !CmpInst::isUnsigned(VecPred) && TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) && match(I->getOperand(0), m_And(m_Value(), m_Value()))) { if (match(I->getOperand(1), m_Zero())) @@ -6235,10 +6284,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } - auto ShouldSinkCondition = [](Value *Cond) -> bool { + auto ShouldSinkCondition = [](Value *Cond, + SmallVectorImpl<Use *> &Ops) -> bool { + if (!isa<IntrinsicInst>(Cond)) + return false; auto *II = dyn_cast<IntrinsicInst>(Cond); - return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or && - isa<ScalableVectorType>(II->getOperand(0)->getType()); + if (II->getIntrinsicID() != Intrinsic::vector_reduce_or || + !isa<ScalableVectorType>(II->getOperand(0)->getType())) + return false; + if (isa<CmpInst>(II->getOperand(0))) + Ops.push_back(&II->getOperandUse(0)); + return true; }; switch (I->getOpcode()) { @@ -6254,7 +6310,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } break; case Instruction::Select: { - if (!ShouldSinkCondition(I->getOperand(0))) + if (!ShouldSinkCondition(I->getOperand(0), Ops)) return false; Ops.push_back(&I->getOperandUse(0)); @@ -6264,7 +6320,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( if (cast<BranchInst>(I)->isUnconditional()) return false; - if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition())) + if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition(), Ops)) return false; Ops.push_back(&I->getOperandUse(0)); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 010d0aaa..2155ace 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -125,7 +125,7 @@ struct AArch64OutgoingValueAssigner bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); bool Res; - if (Info.IsFixed && !UseVarArgsCCForFixed) { + if (!Flags.isVarArg() && !UseVarArgsCCForFixed) { if (!IsReturn) applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); @@ -361,7 +361,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { unsigned MaxSize = MemTy.getSizeInBytes() * 8; // For varargs, we always want to extend them to 8 bytes, in which case // we disable setting a max. - if (!Arg.IsFixed) + if (Arg.Flags[0].isVarArg()) MaxSize = 0; Register ValVReg = Arg.Regs[RegIndex]; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index d905692..f359731 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1697,7 +1697,7 @@ bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, Pred); AArch64CC::CondCode CC1, CC2; - changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); + changeFCMPPredToAArch64CC(Pred, CC1, CC2); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); if (CC2 != AArch64CC::AL) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 6912caf..7a2b679 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -79,8 +79,7 @@ public: } void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) override; + uint8_t *Data, uint64_t Value, bool IsResolved) override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value) const override; @@ -421,9 +420,8 @@ static bool shouldForceRelocation(const MCFixup &Fixup) { } void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, - const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) { + const MCValue &Target, uint8_t *Data, + uint64_t Value, bool IsResolved) { if (shouldForceRelocation(Fixup)) IsResolved = false; maybeAddReloc(F, Fixup, Target, Value, IsResolved); @@ -460,8 +458,8 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, // Shift the value into position. Value <<= Info.TargetOffset; - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); + assert(Fixup.getOffset() + NumBytes <= F.getSize() && + "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind()); @@ -471,15 +469,16 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, if (FulleSizeInBytes == 0) { // Handle as little-endian for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + Data[i] |= uint8_t((Value >> (i * 8)) & 0xff); } } else { // Handle as big-endian - assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!"); + assert(Fixup.getOffset() + FulleSizeInBytes <= F.getSize() && + "Invalid fixup size!"); assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!"); for (unsigned i = 0; i != NumBytes; ++i) { unsigned Idx = FulleSizeInBytes - 1 - i; - Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff); + Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff); } } @@ -492,9 +491,9 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, // If the immediate is negative, generate MOVN else MOVZ. // (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ. if (SignedValue < 0) - Data[Offset + 3] &= ~(1 << 6); + Data[3] &= ~(1 << 6); else - Data[Offset + 3] |= (1 << 6); + Data[3] |= (1 << 6); } } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 7618a57..45ac023 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -96,8 +96,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup, case AArch64::S_TPREL: case AArch64::S_TLSDESC: case AArch64::S_TLSDESC_AUTH: - if (auto *SA = Target.getAddSym()) - cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS); + if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym())) + static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS); break; default: break; @@ -488,7 +488,8 @@ bool AArch64ELFObjectWriter::needsRelocateWithSymbol(const MCValue &Val, // this global needs to be tagged. In addition, the linker needs to know // whether to emit a special addend when relocating `end` symbols, and this // can only be determined by the attributes of the symbol itself. - if (Val.getAddSym() && cast<MCSymbolELF>(Val.getAddSym())->isMemtag()) + if (Val.getAddSym() && + static_cast<const MCSymbolELF *>(Val.getAddSym())->isMemtag()) return true; if ((Val.getSpecifier() & AArch64::S_GOT) == AArch64::S_GOT) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 6257e99..14547e3 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -418,7 +418,8 @@ private: } MCSymbol *emitMappingSymbol(StringRef Name) { - auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name)); + auto *Symbol = + static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name)); emitLabel(Symbol); return Symbol; } @@ -455,7 +456,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) { getStreamer().getAssembler().registerSymbol(*Symbol); - cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS); + static_cast<MCSymbolELF *>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS); } void AArch64TargetELFStreamer::finish() { @@ -541,7 +542,7 @@ void AArch64TargetELFStreamer::finish() { MCSectionELF *MemtagSec = nullptr; for (const MCSymbol &Symbol : Asm.symbols()) { - const auto &Sym = cast<MCSymbolELF>(Symbol); + auto &Sym = static_cast<const MCSymbolELF &>(Symbol); if (Sym.isMemtag()) { MemtagSec = Ctx.getELFSection(".memtag.globals.static", ELF::SHT_AARCH64_MEMTAG_GLOBALS_STATIC, 0); @@ -556,7 +557,7 @@ void AArch64TargetELFStreamer::finish() { S.switchSection(MemtagSec); const auto *Zero = MCConstantExpr::create(0, Ctx); for (const MCSymbol &Symbol : Asm.symbols()) { - const auto &Sym = cast<MCSymbolELF>(Symbol); + auto &Sym = static_cast<const MCSymbolELF &>(Symbol); if (!Sym.isMemtag()) continue; auto *SRE = MCSymbolRefExpr::create(&Sym, Ctx); |