diff options
author | Felix (Ting Wang) <Ting.Wang.SH@ibm.com> | 2024-04-12 08:18:01 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-12 08:18:01 +0800 |
commit | 09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0 (patch) | |
tree | 8fd744c659f86c435238aba0e6047a085687426d /llvm/lib | |
parent | bf1d7b8df287d69ee265b91be40dec37267b2d5c (diff) | |
download | llvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.zip llvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.tar.gz llvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.tar.bz2 |
[PowerPC][AIX] Enable aix-small-local-dynamic-tls target attribute (#86641)
Following the aix-small-local-exec-tls target attribute, this patch adds
a target attribute for an AIX-specific option in llc that informs the
compiler that it can use a faster access sequence for the local-dynamic
TLS model (formally named aix-small-local-dynamic-tls) when TLS
variables are less than ~32KB in size.
The patch either produces an addi/la with a displacement off of module
handle (return value from .__tls_get_mod) when the address is
calculated, or it produces an addi/la followed by a load/store when the
address is calculated and used for further accesses.
---------
Co-authored-by: Amy Kwan <amy.kwan1@ibm.com>
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 26 |
6 files changed, 75 insertions, 29 deletions
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index f4998e9..714ce64 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -71,6 +71,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize( return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLD: + return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForHalf16}; } } break; case PPC::fixup_ppc_half16ds: @@ -86,6 +88,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize( return {XCOFF::RelocationType::R_TOCL, 15}; case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: return {XCOFF::RelocationType::R_TLS_LE, 15}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLD: + return {XCOFF::RelocationType::R_TLS_LD, 15}; } } break; case PPC::fixup_ppc_br24: diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 535616d..b962ed2 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -329,6 +329,15 @@ def FeatureAIXLocalExecTLS : "Produce a TOC-free local-exec TLS sequence for this function " "for 64-bit AIX">; +// Specifies that local-dynamic TLS accesses in any function with this target +// attribute should use the optimized sequence (where the offset is an immediate +// off the module-handle for which the linker might add fix-up code for if the +// immediate is too large). +def FeatureAIXLocalDynamicTLS : + SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS", + "true", "Produce a faster local-dynamic TLS sequence for this " + "function for 64-bit AIX">; + def FeaturePredictableSelectIsExpensive : SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive", diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 16942c6..1c57b92 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -803,7 +803,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { MCInst TmpInst; const bool IsPPC64 = Subtarget->isPPC64(); const bool IsAIX = Subtarget->isAIXABI(); - const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS(); + const bool HasAIXSmallLocalTLS = Subtarget->hasAIXSmallLocalExecTLS() || + Subtarget->hasAIXSmallLocalDynamicTLS(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -1612,11 +1613,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::LFD: case PPC::STFD: case PPC::ADDI8: { - // A faster non-TOC-based local-exec sequence is represented by `addi` - // or a load/store instruction (that directly loads or stores off of the - // thread pointer) with an immediate operand having the MO_TPREL_FLAG. + // A faster non-TOC-based local-[exec|dynamic] sequence is represented by + // `addi` or a load/store instruction (that directly loads or stores off of + // the thread pointer) with an immediate operand having the MO_TPREL_FLAG. // Such instructions do not otherwise arise. - if (!HasAIXSmallLocalExecTLS) + if (!HasAIXSmallLocalTLS) break; bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8; unsigned OpNum = IsMIADDI8 ? 2 : 1; @@ -1624,7 +1625,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { unsigned Flag = MO.getTargetFlags(); if (Flag == PPCII::MO_TPREL_FLAG || Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG || - Flag == PPCII::MO_TPREL_PCREL_FLAG) { + Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) { LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset()); @@ -1672,7 +1673,12 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO, assert(MO.isGlobal() && "Only expecting a global MachineOperand here!"); const GlobalValue *GValue = MO.getGlobal(); - assert(TM.getTLSModel(GValue) == TLSModel::LocalExec && + // TODO: Handle the aix-small-local-dynamic-tls non-zero offset case. + TLSModel::Model Model = TM.getTLSModel(GValue); + if (Model == TLSModel::LocalDynamic) { + return nullptr; + } + assert(Model == TLSModel::LocalExec && "Only local-exec accesses are handled!"); bool IsGlobalADeclaration = GValue->isDeclarationForLinker(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 52d5b71..d27932f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -153,10 +153,10 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; -// A faster local-exec TLS access sequence (enabled with the -// -maix-small-local-exec-tls option) can be produced for TLS variables; -// consistent with the IBM XL compiler, we apply a max size of slightly under -// 32KB. +// A faster local-[exec|dynamic] TLS access sequence (enabled with the +// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS +// variables; consistent with the IBM XL compiler, we apply a max size of +// slightly under 32KB. constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751; // FIXME: Remove this once the bug has been fixed! @@ -3434,6 +3434,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, } if (Model == TLSModel::LocalDynamic) { + bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS(); + + // We do not implement the 32-bit version of the faster access sequence + // for local-dynamic that is controlled by -maix-small-local-dynamic-tls. + if (!Is64Bit && HasAIXSmallLocalDynamicTLS) + report_fatal_error("The small-local-dynamic TLS access sequence is " + "currently only supported on AIX (64-bit mode)."); + // For local-dynamic on AIX, we need to generate one TOC entry for each // variable offset, and a single module-handle TOC entry for the entire // file. @@ -3454,6 +3462,22 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC); + // With the -maix-small-local-dynamic-tls option, produce a faster access + // sequence for local-dynamic TLS variables where the offset from the + // module-handle is encoded as an immediate operand. + // + // We only utilize the faster local-dynamic access sequence when the TLS + // variable has a size within the policy limit. We treat types that are + // not sized or are empty as being over the policy size limit. + if (HasAIXSmallLocalDynamicTLS) { + Type *GVType = GV->getValueType(); + if (GVType->isSized() && !GVType->isEmptyTy() && + GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <= + AIXSmallTlsPolicySizeLimit) + return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, + ModuleHandle); + } + return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset); } diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 9a3ca5a..c05bb37 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -96,15 +96,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL; - else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) { + else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG || + MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) { assert(MO.isGlobal() && "Only expecting a global MachineOperand here!"); TLSModel::Model Model = TM.getTLSModel(MO.getGlobal()); - // For the local-exec TLS model, we may generate the offset from the TLS - // base as an immediate operand (instead of using a TOC entry). - // Set the relocation type in case the result is used for purposes other - // than a TOC reference. In TOC reference cases, this result is discarded. + // For the local-[exec|dynamic] TLS model, we may generate the offset from + // the TLS base as an immediate operand (instead of using a TOC entry). Set + // the relocation type in case the result is used for purposes other than a + // TOC reference. In TOC reference cases, this result is discarded. if (Model == TLSModel::LocalExec) RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE; + else if (Model == TLSModel::LocalDynamic) + RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD; } const MachineInstr *MI = MO.getParent(); diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 653d9bd..d172255 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -124,22 +124,22 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, // Determine endianness. IsLittleEndian = TM.isLittleEndian(); - if (HasAIXSmallLocalExecTLS) { + if (HasAIXSmallLocalExecTLS || HasAIXSmallLocalDynamicTLS) { if (!TargetTriple.isOSAIX() || !IsPPC64) - report_fatal_error( - "The aix-small-local-exec-tls attribute is only supported on AIX in " - "64-bit mode.\n", - false); - // The aix-small-local-exec-tls attribute should only be used with + report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute is " + "only supported on AIX in " + "64-bit mode.\n", + false); + // The aix-small-local-[exec|dynamic]-tls attribute should only be used with // -data-sections, as having data sections turned off with this option - // is not ideal for performance. Moreover, the small-local-exec-tls region - // is a limited resource, and should not be used for variables that may - // be replaced. + // is not ideal for performance. Moreover, the + // small-local-[exec|dynamic]-tls region is a limited resource, and should + // not be used for variables that may be replaced. if (!TM.getDataSections()) - report_fatal_error( - "The aix-small-local-exec-tls attribute can only be specified with " - "-data-sections.\n", - false); + report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute can " + "only be specified with " + "-data-sections.\n", + false); } } |