aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorFelix (Ting Wang) <Ting.Wang.SH@ibm.com>2024-04-12 08:18:01 +0800
committerGitHub <noreply@github.com>2024-04-12 08:18:01 +0800
commit09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0 (patch)
tree8fd744c659f86c435238aba0e6047a085687426d /llvm/lib
parentbf1d7b8df287d69ee265b91be40dec37267b2d5c (diff)
downloadllvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.zip
llvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.tar.gz
llvm-09d51a841dcfbc41c3d7f3274b109b5f9fb09bb0.tar.bz2
[PowerPC][AIX] Enable aix-small-local-dynamic-tls target attribute (#86641)
Following the aix-small-local-exec-tls target attribute, this patch adds a target attribute for an AIX-specific option in llc that informs the compiler that it can use a faster access sequence for the local-dynamic TLS model (formally named aix-small-local-dynamic-tls) when TLS variables are less than ~32KB in size. The patch either produces an addi/la with a displacement off of module handle (return value from .__tls_get_mod) when the address is calculated, or it produces an addi/la followed by a load/store when the address is calculated and used for further accesses. --------- Co-authored-by: Amy Kwan <amy.kwan1@ibm.com>
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td9
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp32
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp26
6 files changed, 75 insertions, 29 deletions
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index f4998e9..714ce64 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -71,6 +71,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+ return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForHalf16};
}
} break;
case PPC::fixup_ppc_half16ds:
@@ -86,6 +88,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TOCL, 15};
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
return {XCOFF::RelocationType::R_TLS_LE, 15};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+ return {XCOFF::RelocationType::R_TLS_LD, 15};
}
} break;
case PPC::fixup_ppc_br24:
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 535616d..b962ed2 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -329,6 +329,15 @@ def FeatureAIXLocalExecTLS :
"Produce a TOC-free local-exec TLS sequence for this function "
"for 64-bit AIX">;
+// Specifies that local-dynamic TLS accesses in any function with this target
+// attribute should use the optimized sequence (where the offset is an immediate
+// off the module-handle for which the linker might add fix-up code for if the
+// immediate is too large).
+def FeatureAIXLocalDynamicTLS :
+ SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS",
+ "true", "Produce a faster local-dynamic TLS sequence for this "
+ "function for 64-bit AIX">;
+
def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
"PredictableSelectIsExpensive",
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 16942c6..1c57b92 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -803,7 +803,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
- const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
+ const bool HasAIXSmallLocalTLS = Subtarget->hasAIXSmallLocalExecTLS() ||
+ Subtarget->hasAIXSmallLocalDynamicTLS();
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();
@@ -1612,11 +1613,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::LFD:
case PPC::STFD:
case PPC::ADDI8: {
- // A faster non-TOC-based local-exec sequence is represented by `addi`
- // or a load/store instruction (that directly loads or stores off of the
- // thread pointer) with an immediate operand having the MO_TPREL_FLAG.
+ // A faster non-TOC-based local-[exec|dynamic] sequence is represented by
+ // `addi` or a load/store instruction (that directly loads or stores off of
+ // the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
// Such instructions do not otherwise arise.
- if (!HasAIXSmallLocalExecTLS)
+ if (!HasAIXSmallLocalTLS)
break;
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
unsigned OpNum = IsMIADDI8 ? 2 : 1;
@@ -1624,7 +1625,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
unsigned Flag = MO.getTargetFlags();
if (Flag == PPCII::MO_TPREL_FLAG ||
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
- Flag == PPCII::MO_TPREL_PCREL_FLAG) {
+ Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
@@ -1672,7 +1673,12 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
const GlobalValue *GValue = MO.getGlobal();
- assert(TM.getTLSModel(GValue) == TLSModel::LocalExec &&
+ // TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
+ TLSModel::Model Model = TM.getTLSModel(GValue);
+ if (Model == TLSModel::LocalDynamic) {
+ return nullptr;
+ }
+ assert(Model == TLSModel::LocalExec &&
"Only local-exec accesses are handled!");
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 52d5b71..d27932f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -153,10 +153,10 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
-// A faster local-exec TLS access sequence (enabled with the
-// -maix-small-local-exec-tls option) can be produced for TLS variables;
-// consistent with the IBM XL compiler, we apply a max size of slightly under
-// 32KB.
+// A faster local-[exec|dynamic] TLS access sequence (enabled with the
+// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
+// variables; consistent with the IBM XL compiler, we apply a max size of
+// slightly under 32KB.
constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
// FIXME: Remove this once the bug has been fixed!
@@ -3434,6 +3434,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
}
if (Model == TLSModel::LocalDynamic) {
+ bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
+
+ // We do not implement the 32-bit version of the faster access sequence
+ // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
+ if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
+ report_fatal_error("The small-local-dynamic TLS access sequence is "
+ "currently only supported on AIX (64-bit mode).");
+
// For local-dynamic on AIX, we need to generate one TOC entry for each
// variable offset, and a single module-handle TOC entry for the entire
// file.
@@ -3454,6 +3462,22 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
SDValue ModuleHandle =
DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
+ // With the -maix-small-local-dynamic-tls option, produce a faster access
+ // sequence for local-dynamic TLS variables where the offset from the
+ // module-handle is encoded as an immediate operand.
+ //
+ // We only utilize the faster local-dynamic access sequence when the TLS
+ // variable has a size within the policy limit. We treat types that are
+ // not sized or are empty as being over the policy size limit.
+ if (HasAIXSmallLocalDynamicTLS) {
+ Type *GVType = GV->getValueType();
+ if (GVType->isSized() && !GVType->isEmptyTy() &&
+ GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
+ AIXSmallTlsPolicySizeLimit)
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
+ ModuleHandle);
+ }
+
return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
}
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 9a3ca5a..c05bb37 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -96,15 +96,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
- else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) {
+ else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
+ MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
- // For the local-exec TLS model, we may generate the offset from the TLS
- // base as an immediate operand (instead of using a TOC entry).
- // Set the relocation type in case the result is used for purposes other
- // than a TOC reference. In TOC reference cases, this result is discarded.
+ // For the local-[exec|dynamic] TLS model, we may generate the offset from
+ // the TLS base as an immediate operand (instead of using a TOC entry). Set
+ // the relocation type in case the result is used for purposes other than a
+ // TOC reference. In TOC reference cases, this result is discarded.
if (Model == TLSModel::LocalExec)
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
+ else if (Model == TLSModel::LocalDynamic)
+ RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
}
const MachineInstr *MI = MO.getParent();
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 653d9bd..d172255 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -124,22 +124,22 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
// Determine endianness.
IsLittleEndian = TM.isLittleEndian();
- if (HasAIXSmallLocalExecTLS) {
+ if (HasAIXSmallLocalExecTLS || HasAIXSmallLocalDynamicTLS) {
if (!TargetTriple.isOSAIX() || !IsPPC64)
- report_fatal_error(
- "The aix-small-local-exec-tls attribute is only supported on AIX in "
- "64-bit mode.\n",
- false);
- // The aix-small-local-exec-tls attribute should only be used with
+ report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute is "
+ "only supported on AIX in "
+ "64-bit mode.\n",
+ false);
+ // The aix-small-local-[exec|dynamic]-tls attribute should only be used with
// -data-sections, as having data sections turned off with this option
- // is not ideal for performance. Moreover, the small-local-exec-tls region
- // is a limited resource, and should not be used for variables that may
- // be replaced.
+ // is not ideal for performance. Moreover, the
+ // small-local-[exec|dynamic]-tls region is a limited resource, and should
+ // not be used for variables that may be replaced.
if (!TM.getDataSections())
- report_fatal_error(
- "The aix-small-local-exec-tls attribute can only be specified with "
- "-data-sections.\n",
- false);
+ report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute can "
+ "only be specified with "
+ "-data-sections.\n",
+ false);
}
}