aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZaara Syeda <syzaara@ca.ibm.com>2024-04-17 09:24:53 -0400
committerGitHub <noreply@github.com>2024-04-17 09:24:53 -0400
commit76ad2897480a85532eee93daf041246881772693 (patch)
tree6313f7678d7d2c63c184c44deb38712cbd61424e
parent1fc72dbc807fb138cafd05501e2e31beaa574693 (diff)
downloadllvm-76ad2897480a85532eee93daf041246881772693.zip
llvm-76ad2897480a85532eee93daf041246881772693.tar.gz
llvm-76ad2897480a85532eee93daf041246881772693.tar.bz2
[PowerPC] 32-bit large code-model support for toc-data (#85129)
This patch adds the pseudo op ADDItocL for 32-bit large code-model support for toc-data.
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp61
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp35
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def10
-rw-r--r--llvm/test/CodeGen/PowerPC/toc-data.ll67
7 files changed, 144 insertions, 36 deletions
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 5015ba8..32cebb6 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -881,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
(instrs
- ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL8, LI, LI8,
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, ADDItocL8, LI, LI8,
ADDIC, ADDIC8,
ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
ADDME, ADDME8,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1c57b92..6e1002c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1148,15 +1148,27 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO);
- // Always use TOC on AIX. Map the global address operand to be a reference
- // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
- // reference the storage allocated in the TOC which contains the address of
- // 'MOSymbol'.
- MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
- const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
- MCSymbolRefExpr::VK_PPC_U,
- OutContext);
+ // If the symbol isn't toc-data then use the TOC on AIX.
+ // Map the global address operand to be a reference to the TOC entry we
+ // will synthesize later. 'TOCEntry' is a label used to reference the
+ // storage allocated in the TOC which contains the address of 'MOSymbol'.
+ // If the toc-data attribute is used, the TOC entry contains the data
+ // rather than the address of the MOSymbol.
+ if (![](const MachineOperand &MO) {
+ if (!MO.isGlobal())
+ return false;
+
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal());
+ if (!GV)
+ return false;
+
+ return GV->hasAttribute("toc-data");
+ }(MO)) {
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
+ }
+
+ const MCExpr *Exp = MCSymbolRefExpr::create(
+ MOSymbol, MCSymbolRefExpr::VK_PPC_U, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -1273,25 +1285,32 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
+ case PPC::ADDItocL:
case PPC::ADDItocL8: {
- // Transform %xd = ADDItocL8 %xs, @sym
+ // Transform %xd = ADDItocL %xs, @sym
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
- // Change the opcode to ADDI8. If the global address is external, then
- // generate a TOC entry and reference that. Otherwise, reference the
- // symbol directly.
- TmpInst.setOpcode(PPC::ADDI8);
+ unsigned Op = MI->getOpcode();
+
+ // Change the opcode to load address for tocdata
+ TmpInst.setOpcode(Op == PPC::ADDItocL8 ? PPC::ADDI8 : PPC::LA);
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL8.");
+ assert((Op == PPC::ADDItocL8)
+ ? (MO.isGlobal() || MO.isCPI())
+ : MO.isGlobal() && "Invalid operand for ADDItocL8.");
+ assert(!(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+ "Interposable definitions must use indirect accesses.");
- LLVM_DEBUG(assert(
- !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
- "Interposable definitions must use indirect access."));
+ // Map the operand to its corresponding MCSymbol.
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ const MCExpr *Exp = MCSymbolRefExpr::create(
+ MOSymbol,
+ Op == PPC::ADDItocL8 ? MCSymbolRefExpr::VK_PPC_TOC_LO
+ : MCSymbolRefExpr::VK_PPC_L,
+ OutContext);
- const MCExpr *Exp =
- MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
- MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index af82b6c..2f647da 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -510,7 +510,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
// Check if a SDValue has the toc-data attribute.
-static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
+static bool hasTocDataAttr(SDValue Val) {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
if (!GA)
return false;
@@ -6115,8 +6115,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
assert(isAIXABI && "ELF ABI already handled");
- if (hasTocDataAttr(N->getOperand(0),
- CurDAG->getDataLayout().getPointerSize())) {
+ if (hasTocDataAttr(N->getOperand(0))) {
replaceWith(PPC::ADDItoc, N, MVT::i32);
return;
}
@@ -6128,8 +6127,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (isPPC64 && CModel == CodeModel::Small) {
assert(isAIXABI && "ELF ABI handled in common SelectCode");
- if (hasTocDataAttr(N->getOperand(0),
- CurDAG->getDataLayout().getPointerSize())) {
+ if (hasTocDataAttr(N->getOperand(0))) {
replaceWith(PPC::ADDItoc8, N, MVT::i64);
return;
}
@@ -6144,9 +6142,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
" ELF/AIX or 32-bit AIX in the following.");
// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
- // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
- // generate two instructions as described below. The first source operand
- // is a symbol reference. If it must be toc-referenced according to
+ // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non
+ // toc-data symbols.
+ // We generate two instructions as described below. The first source
+ // operand is a symbol reference. If it must be toc-referenced according to
// Subtarget, we generate:
// [32-bit AIX]
// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
@@ -6154,6 +6153,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// LDtocL(@sym, ADDIStocHA8(%x2, @sym))
// Otherwise we generate:
// ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
+
+ // For large code model toc-data symbols we generate:
+ // [32-bit AIX]
+ // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
+ // [64-bit AIX]
+ // Currently not supported.
+
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
@@ -6161,6 +6167,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(
isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
+ // On AIX if the symbol has the toc-data attribute it will be defined
+ // in the TOC entry, so we use an ADDItocL similar to the medium code
+ // model ELF abi.
+ if (isAIXABI && hasTocDataAttr(GA)) {
+ if (isPPC64)
+ report_fatal_error(
+ "64-bit large code model toc-data not yet supported");
+
+ ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, VT,
+ SDValue(Tmp, 0), GA));
+ return;
+ }
+
if (PPCLowering->isAccessedAsGotIndirect(GA)) {
// If it is accessed as got-indirect, we need an extra LWZ/LD to load
// the address.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 93874d65..b32f178ca 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1090,6 +1090,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
case PPC::LIS8:
case PPC::ADDIStocHA:
case PPC::ADDIStocHA8:
+ case PPC::ADDItocL:
case PPC::ADDItocL8:
case PPC::LOAD_STACK_GUARD:
case PPC::PPCLdFixedAddr:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6423e69..43e3902 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3346,11 +3346,13 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
"#ADDIStocHA",
[(set i32:$rD,
(PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>;
-// Local Data Transform
+// TOC Data Transform AIX
def ADDItoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#ADDItoc",
[(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDItocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
+ "#ADDItocL", []>;
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index fb6e656..1a61ae2 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -32,7 +32,7 @@
// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
// lvewx, lvx, lxsdx}
FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
- FUSION_OP_SET(ADDI, ADDI8, ADDItocL8), \
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL, ADDItocL8), \
FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
LVX, LXSDX))
@@ -134,13 +134,13 @@ FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
// addis rx,ra,si - addi rt,rx,SI, SI >= 0
FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
- FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
- FUSION_OP_SET(ADDI, ADDI8, ADDItocL8))
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8, ADDIStocHA),
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL8, ADDItocL))
// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
- FUSION_OP_SET(ADDI, ADDI8, ADDItocL8),
- FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL8, ADDItocL),
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8, ADDIStocHA))
// mtctr - { bcctr,bcctrl }
FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index cbf3be9..7f7afe7 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -12,6 +12,14 @@
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s \
+; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK32LARGE
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32LARGE
+
+; Global variables i and f have the toc-data attribute.
+; In the following functions, those writing to or reading from
+; variables i and f should use the toc-data access pattern.
+; All remaining variables should use the regular toc access sequence.
@i = dso_local global i32 0, align 4 #0
@d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
@f = dso_local local_unnamed_addr global float 0x4005BE76C0000000, align 4 #0
@@ -44,6 +52,16 @@ define dso_local void @write_int(i32 signext %in) {
; TEST64: la 4, i[TD](2)
; TEST64-NEXT: stw 3, 0(4)
+; CHECK32LARGE: name: write_int
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @i
+; CHECK32LARGE-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH2]] :: (store (s32) into @i)
+
+; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming stw
+; TEST32LARGE: .write_int:
+; TEST32LARGE: addis 4, i[TD]@u(2)
+; TEST32LARGE-NEXT: la 4, i[TD]@l(4)
+; TEST32LARGE-NEXT: stw 3, 0(4)
define dso_local i64 @read_ll() {
entry:
@@ -70,6 +88,15 @@ define dso_local i64 @read_ll() {
; TEST64: ld 3, L..C0(2)
; TEST64-NEXT: ld 3, 0(3)
+; CHECK32LARGE: name: read_ll
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @ll
+; CHECK32LARGE: LWZtocL @ll, killed %[[SCRATCH1]] :: (load (s32) from got)
+
+; TEST32LARGE: .read_ll:
+; TEST32LARGE: addis 3, L..C0@u(2)
+; TEST32LARGE-NEXT: lwz 4, L..C0@l(3)
+; TEST32LARGE-NEXT: lwz 3, 0(4)
+; TEST32LARGE-NEXT: lwz 4, 4(4)
define dso_local float @read_float() {
entry:
@@ -96,6 +123,16 @@ define dso_local float @read_float() {
; TEST64: la 3, f[TD](2)
; TEST64-NEXT: lfs 1, 0(3)
+; CHECK32LARGE: name: read_float
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @f
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc_and_gprc_nor0 = ADDItocL killed %[[SCRATCH1]], @f
+; CHECK32LARGE-NEXT: LFS 0, killed %[[SCRATCH2]] :: (dereferenceable load (s32) from @f)
+
+; FIXME: peephole optimization opportunity for lower part relocation @l to the consuming lfs
+; TEST32LARGE: .read_float:
+; TEST32LARGE: addis 3, f[TD]@u(2)
+; TEST32LARGE-NEXT: la 3, f[TD]@l(3)
+; TEST32LARGE-NEXT: lfs 1, 0(3)
define dso_local void @write_double(double %in) {
entry:
@@ -121,6 +158,14 @@ define dso_local void @write_double(double %in) {
; TEST64: ld 3, L..C1(2)
; TEST64-NEXT: stfd 1, 0(3)
+; CHECK32LARGE: name: write_double
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @d
+; CHECK32LARGE: LWZtocL @d, killed %[[SCRATCH1]] :: (load (s32) from got)
+
+; TEST32LARGE: .write_double:
+; TEST32LARGE: addis 3, L..C1@u(2)
+; TEST32LARGE-NEXT: lwz 3, L..C1@l(3)
+; TEST32LARGE-NEXT: stfd 1, 0(3)
define dso_local nonnull ptr @addr() {
entry:
@@ -144,6 +189,15 @@ define dso_local nonnull ptr @addr() {
; TEST64: .addr
; TEST64: la 3, i[TD](2)
+; CHECK32LARGE: name: addr
+; CHECK32LARGE: %[[SCRATCH1:[0-9]+]]:gprc_and_gprc_nor0 = ADDIStocHA $r2, @i
+; CHECK32LARGE-NEXT: %[[SCRATCH2:[0-9]+]]:gprc = ADDItocL killed %[[SCRATCH1]], @i
+; CHECK32LARGE-NEXT: $r3 = COPY %[[SCRATCH2]]
+
+; TEST32LARGE: .addr:
+; TEST32LARGE: addis 3, i[TD]@u(2)
+; TEST32LARGE-NEXT: la 3, i[TD]@l(3)
+
; TEST32: .toc
; TEST32: .tc ll[TC],ll[RW]
; TEST32-NOT: .csect ll[TD]
@@ -170,4 +224,17 @@ define dso_local nonnull ptr @addr() {
; TEST64-NEXT: .globl f[TD]
; TEST64-NOT: .tc f[TD],f[RW]
+; TEST32LARGE: .toc
+; TEST32LARGE: .tc ll[TE],ll[RW]
+; TEST32LARGE-NOT: .csect ll[TD]
+; TEST32LARGE: .tc d[TE],d[RW]
+; TEST32LARGE-NOT: .csect d[TD],2
+; TEST32LARGE: .csect i[TD],2
+; TEST32LARGE-NEXT: .globl i[TD]
+; TEST32LARGE-NEXT: .align 2
+; TEST32LARGE-NOT: .tc i[TE],i[RW]
+; TEST32LARGE: .csect f[TD],2
+; TEST32LARGE-NEXT: .globl f[TD]
+; TEST32LARGE-NOT: .tc f[TE],f[RW]
+
attributes #0 = { "toc-data" }