diff options
| author | Jessica Paquette <jpaquette@apple.com> | 2020-09-09 09:45:54 -0700 |
|---|---|---|
| committer | Jessica Paquette <jpaquette@apple.com> | 2020-09-09 15:14:46 -0700 |
| commit | 480e7f43a22578beaa2edc7a271e77793222a1c3 (patch) | |
| tree | 4a034ac6b511a47d99cbf045ece7046c87bcbc92 | |
| parent | 9969c317ff0877ed6155043422c70e1d4c028a35 (diff) | |
| download | llvm-480e7f43a22578beaa2edc7a271e77793222a1c3.zip llvm-480e7f43a22578beaa2edc7a271e77793222a1c3.tar.gz llvm-480e7f43a22578beaa2edc7a271e77793222a1c3.tar.bz2 | |
[AArch64][GlobalISel] Share address mode selection code for memops
We were missing support for the G_ADD_LOW + ADRP folding optimization in the
manual selection code for G_LOAD, G_STORE, and G_ZEXTLOAD.
As a result, we were missing cases like this:
```
@foo = external hidden global i32*
define void @baz(i32* %0) {
store i32* %0, i32** @foo
ret void
}
```
https://godbolt.org/z/16r7ad
This functionality already existed in the addressing mode functions for the
importer. So, this patch makes the manual selection code use
`selectAddrModeIndexed` rather than duplicating work.
This is a 0.2% geomean code size improvement for CTMark at -O3.
There is one code size increase (0.1% on lencod) which is likely because
`selectAddrModeIndexed` doesn't look through constants.
Differential Revision: https://reviews.llvm.org/D87397
| -rw-r--r-- | llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 87 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir | 20 |
2 files changed, 64 insertions, 43 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a8d6818..228db83 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2260,18 +2260,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } auto &MemOp = **I.memoperands_begin(); + uint64_t MemSizeInBytes = MemOp.getSize(); if (MemOp.isAtomic()) { // For now we just support s8 acquire loads to be able to compile stack // protector code. if (MemOp.getOrdering() == AtomicOrdering::Acquire && - MemOp.getSize() == 1) { + MemSizeInBytes == 1) { I.setDesc(TII.get(AArch64::LDARB)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); return false; } - unsigned MemSizeInBits = MemOp.getSize() * 8; + unsigned MemSizeInBits = MemSizeInBytes * 8; const Register PtrReg = I.getOperand(1).getReg(); #ifndef NDEBUG @@ -2286,78 +2287,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const Register ValReg = I.getOperand(0).getReg(); const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); - const unsigned NewOpc = - selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); - if (NewOpc == I.getOpcode()) - return false; - - I.setDesc(TII.get(NewOpc)); - - uint64_t Offset = 0; - auto *PtrMI = MRI.getVRegDef(PtrReg); - - // Try to fold a GEP into our unsigned immediate addressing mode. - if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { - if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { - int64_t Imm = *COff; - const unsigned Size = MemSizeInBits / 8; - const unsigned Scale = Log2_32(Size); - if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { - Register Ptr2Reg = PtrMI->getOperand(1).getReg(); - I.getOperand(1).setReg(Ptr2Reg); - PtrMI = MRI.getVRegDef(Ptr2Reg); - Offset = Imm / Size; - } + // Helper lambda for partially selecting I. Either returns the original + // instruction with an updated opcode, or a new instruction. + auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { + bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; + const unsigned NewOpc = + selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); + if (NewOpc == I.getOpcode()) + return nullptr; + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = + selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); + if (!AddrModeFns) { + // Can't fold anything. Use the original instruction. + I.setDesc(TII.get(NewOpc)); + I.addOperand(MachineOperand::CreateImm(0)); + return &I; } - } - // If we haven't folded anything into our addressing mode yet, try to fold - // a frame index into the base+offset. - if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) - I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); + IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); + NewInst.cloneMemRefs(I); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + I.eraseFromParent(); + return &*NewInst; + }; - I.addOperand(MachineOperand::CreateImm(Offset)); + MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); + if (!LoadStore) + return false; // If we're storing a 0, use WZR/XZR. if (Opcode == TargetOpcode::G_STORE) { auto CVal = getConstantVRegValWithLookThrough( - ValReg, MRI, /*LookThroughInstrs = */ true, + LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, /*HandleFConstants = */ false); if (CVal && CVal->Value == 0) { - unsigned Opc = I.getOpcode(); - switch (Opc) { + switch (LoadStore->getOpcode()) { case AArch64::STRWui: case AArch64::STRHHui: case AArch64::STRBBui: - I.getOperand(0).setReg(AArch64::WZR); + LoadStore->getOperand(0).setReg(AArch64::WZR); break; case AArch64::STRXui: - I.getOperand(0).setReg(AArch64::XZR); + LoadStore->getOperand(0).setReg(AArch64::XZR); break; } } } if (IsZExtLoad) { - // The zextload from a smaller type to i32 should be handled by the importer. - if (MRI.getType(ValReg).getSizeInBits() != 64) + // The zextload from a smaller type to i32 should be handled by the + // importer. + if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) return false; // If we have a ZEXTLOAD then change the load's type to be a narrower reg - //and zero_extend with SUBREG_TO_REG. + // and zero_extend with SUBREG_TO_REG. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - Register DstReg = I.getOperand(0).getReg(); - I.getOperand(0).setReg(LdReg); + Register DstReg = LoadStore->getOperand(0).getReg(); + LoadStore->getOperand(0).setReg(LdReg); - MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); + MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) .addImm(0) .addUse(LdReg) .addImm(AArch64::sub_32); - constrainSelectedInstRegOperands(I, TII, TRI, RBI); + constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, MRI); } - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); } case TargetOpcode::G_SMULH: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir index db355df..05038b4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -39,6 +39,9 @@ define void @store_8xi16(<8 x i16> %v, <8 x i16>* %ptr) { ret void } define void @store_16xi8(<16 x i8> %v, <16 x i8>* %ptr) { ret void } + @x = external hidden local_unnamed_addr global i32*, align 8 + define void @store_adrp_add_low() { ret void } + ... --- @@ -600,3 +603,20 @@ body: | RET_ReallyLR ... +--- +name: store_adrp_add_low +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: store_adrp_add_low + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + ; CHECK: STRXui %copy, %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store 8 into @x) + %copy:gpr(p0) = COPY $x0 + %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) |
