aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Lafreniere <peter@n8pjl.ca>2024-09-03 19:04:23 -0400
committerGitHub <noreply@github.com>2024-09-03 19:04:23 -0400
commitd3c10b51a99d4476261f57ceaa7db60960cd5493 (patch)
treea14fec644dd6a35ad79ed4d9717e6333e22023e8
parent98bb354a0add4aeb614430f48a23f87992166239 (diff)
downloadllvm-d3c10b51a99d4476261f57ceaa7db60960cd5493.zip
llvm-d3c10b51a99d4476261f57ceaa7db60960cd5493.tar.gz
llvm-d3c10b51a99d4476261f57ceaa7db60960cd5493.tar.bz2
[M68k] Introduce more MOVI cases (#98377)
Add three more special cases for loading registers with immediates. The first allows values in the range of [-255, 255] to be loaded with MOVEQ, even if the register is more than 8 bits and the sign extention is unwanted. This is done by loading the bitwise complement of the desired value, then performing a NOT instruction on the loaded register. This special case is only used when a simple MOVEQ cannot be used, and is only used for 32 bit data registers. Address registers cannot support MOVEQ, and the two-instruction sequence is no faster or smaller than a plain MOVE instruction when loading 16 bit immediates on the 68000, and likely slower for more sophisticated microarchitectures. However, the instruction sequence is both smaller and faster than the corresponding MOVE instruction for 32 bit register widths. The second special case is for zeroing address registers. This simply expands to subtracting a register with itself, consuming one instruction word rather than 2-3, with a small improvement in speed as well. The last special case is for assigning sign-extended 16-bit values to a full address register. This takes advantage of the fact that the movea.w instruction sign extends the output, permitting the immediate to be smaller. This is similar to using lea with a 16-bit address, which is not added in this patch as 16-bit absolute addressing is not yet implemented. This is a v2 submission of #90817. It also creates a 'Data' test directory to better align with the backend's tablegen layout.
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp62
-rw-r--r--llvm/test/CodeGen/M68k/Arith/add.ll3
-rw-r--r--llvm/test/CodeGen/M68k/CConv/fastcc-call.ll4
-rw-r--r--llvm/test/CodeGen/M68k/Data/link-unlnk.ll (renamed from llvm/test/CodeGen/M68k/link-unlnk.ll)0
-rw-r--r--llvm/test/CodeGen/M68k/Data/load-extend.ll (renamed from llvm/test/CodeGen/M68k/load-extend.ll)0
-rw-r--r--llvm/test/CodeGen/M68k/Data/load-imm.ll149
-rw-r--r--llvm/test/CodeGen/M68k/Data/sext-i1.ll (renamed from llvm/test/CodeGen/M68k/Arith/sext-i1.ll)0
7 files changed, 208 insertions, 10 deletions
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 23c5c76..2d9285f 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -346,8 +346,8 @@ void M68kInstrInfo::AddZExt(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(And), Reg).addReg(Reg).addImm(Mask);
}
-// Convert MOVI to MOVQ if the target is a data register and the immediate
-// fits in a sign-extended i8, otherwise emit a plain MOV.
+// Convert MOVI to the appropriate instruction (sequence) for setting
+// the register to an immediate value.
bool M68kInstrInfo::ExpandMOVI(MachineInstrBuilder &MIB, MVT MVTSize) const {
Register Reg = MIB->getOperand(0).getReg();
int64_t Imm = MIB->getOperand(1).getImm();
@@ -360,18 +360,66 @@ bool M68kInstrInfo::ExpandMOVI(MachineInstrBuilder &MIB, MVT MVTSize) const {
if (AR16->contains(Reg) || AR32->contains(Reg))
IsAddressReg = true;
+ // We need to assign to the full register to make IV happy
+ Register SReg =
+ MVTSize == MVT::i32
+ ? Reg
+ : Register(RI.getMatchingMegaReg(Reg, IsAddressReg ? AR32 : DR32));
+ assert(SReg && "No viable MEGA register available");
+
LLVM_DEBUG(dbgs() << "Expand " << *MIB.getInstr() << " to ");
+ // Sign extention doesn't matter if we only use the bottom 8 bits
if (MVTSize == MVT::i8 || (!IsAddressReg && Imm >= -128 && Imm <= 127)) {
LLVM_DEBUG(dbgs() << "MOVEQ\n");
- // We need to assign to the full register to make IV happy
- Register SReg =
- MVTSize == MVT::i32 ? Reg : Register(RI.getMatchingMegaReg(Reg, DR32));
- assert(SReg && "No viable MEGA register available");
-
MIB->setDesc(get(M68k::MOVQ));
MIB->getOperand(0).setReg(SReg);
+
+ // Counter the effects of sign-extension with a bitwise not.
+ // This is only faster and smaller for 32 bit values.
+ } else if (DR32->contains(Reg) && isUInt<8>(Imm)) {
+ LLVM_DEBUG(dbgs() << "MOVEQ and NOT\n");
+
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+
+ unsigned SubReg = RI.getSubReg(Reg, M68k::MxSubRegIndex8Lo);
+ assert(SubReg && "No viable SUB register available");
+
+ BuildMI(MBB, MIB.getInstr(), DL, get(M68k::MOVQ), SReg).addImm(~Imm & 0xFF);
+ BuildMI(MBB, MIB.getInstr(), DL, get(M68k::NOT8d), SubReg).addReg(SubReg);
+
+ MIB->removeFromParent();
+
+ // Special case for setting address register to NULL (0)
+ } else if (IsAddressReg && Imm == 0) {
+ LLVM_DEBUG(dbgs() << "SUBA\n");
+
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+
+ BuildMI(MBB, MIB.getInstr(), DL, get(M68k::SUB32ar), SReg)
+ .addReg(SReg, RegState::Undef)
+ .addReg(SReg, RegState::Undef);
+
+ MIB->removeFromParent();
+
+ // movea.w implicitly sign extends to the full register width,
+ // so exploit that if the immediate fits in the correct range.
+ //
+ // TODO: use lea imm.w, %an for further constants when 16-bit
+ // absolute addressing is implemented.
+ } else if (AR32->contains(Reg) && isUInt<16>(Imm)) {
+ LLVM_DEBUG(dbgs() << "MOVEA w/ implicit extend\n");
+
+ unsigned SubReg = RI.getSubReg(Reg, M68k::MxSubRegIndex16Lo);
+ assert(SubReg && "No viable SUB register available");
+
+ MIB->setDesc(get(M68k::MOV16ai));
+ MIB->getOperand(0).setReg(SubReg);
+
+ // Fall back to a move with immediate
} else {
LLVM_DEBUG(dbgs() << "MOVE\n");
MIB->setDesc(get(MVTSize == MVT::i16 ? M68k::MOV16ri : M68k::MOV32ri));
diff --git a/llvm/test/CodeGen/M68k/Arith/add.ll b/llvm/test/CodeGen/M68k/Arith/add.ll
index a9eb0bb8..417fe8f 100644
--- a/llvm/test/CodeGen/M68k/Arith/add.ll
+++ b/llvm/test/CodeGen/M68k/Arith/add.ll
@@ -65,7 +65,8 @@ define fastcc void @test4(ptr inreg %a) nounwind {
; CHECK-NEXT: movem.l %d2, (0,%sp) ; 8-byte Folded Spill
; CHECK-NEXT: move.l (%a0), %d0
; CHECK-NEXT: moveq #0, %d1
-; CHECK-NEXT: move.l #128, %d2
+; CHECK-NEXT: moveq #127, %d2
+; CHECK-NEXT: not.b %d2
; CHECK-NEXT: add.l (4,%a0), %d2
; CHECK-NEXT: addx.l %d0, %d1
; CHECK-NEXT: move.l %d2, (4,%a0)
diff --git a/llvm/test/CodeGen/M68k/CConv/fastcc-call.ll b/llvm/test/CodeGen/M68k/CConv/fastcc-call.ll
index 8d40ebd..f67280c 100644
--- a/llvm/test/CodeGen/M68k/CConv/fastcc-call.ll
+++ b/llvm/test/CodeGen/M68k/CConv/fastcc-call.ll
@@ -13,8 +13,8 @@ define i32 @foo1() nounwind uwtable {
; CHECK-NEXT: move.l #5, (%sp)
; CHECK-NEXT: moveq #1, %d0
; CHECK-NEXT: moveq #2, %d1
-; CHECK-NEXT: move.l #3, %a0
-; CHECK-NEXT: move.l #4, %a1
+; CHECK-NEXT: move.w #3, %a0
+; CHECK-NEXT: move.w #4, %a1
; CHECK-NEXT: jsr (bar1@PLT,%pc)
; CHECK-NEXT: moveq #0, %d0
; CHECK-NEXT: adda.l #4, %sp
diff --git a/llvm/test/CodeGen/M68k/link-unlnk.ll b/llvm/test/CodeGen/M68k/Data/link-unlnk.ll
index fe39a9a..fe39a9a 100644
--- a/llvm/test/CodeGen/M68k/link-unlnk.ll
+++ b/llvm/test/CodeGen/M68k/Data/link-unlnk.ll
diff --git a/llvm/test/CodeGen/M68k/load-extend.ll b/llvm/test/CodeGen/M68k/Data/load-extend.ll
index 5115973..5115973 100644
--- a/llvm/test/CodeGen/M68k/load-extend.ll
+++ b/llvm/test/CodeGen/M68k/Data/load-extend.ll
diff --git a/llvm/test/CodeGen/M68k/Data/load-imm.ll b/llvm/test/CodeGen/M68k/Data/load-imm.ll
new file mode 100644
index 0000000..5105e55
--- /dev/null
+++ b/llvm/test/CodeGen/M68k/Data/load-imm.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=m68k-linux -verify-machineinstrs | FileCheck %s
+
+define i1 @return_true() {
+; CHECK-LABEL: return_true:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #1, %d0
+; CHECK-NEXT: rts
+ ret i1 true
+}
+
+define i8 @return_0_i8() {
+; CHECK-LABEL: return_0_i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #0, %d0
+; CHECK-NEXT: rts
+ ret i8 0
+}
+
+define i16 @return_0_i16() {
+; CHECK-LABEL: return_0_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #0, %d0
+; CHECK-NEXT: rts
+ ret i16 0
+}
+
+define i32 @return_0_i32() {
+; CHECK-LABEL: return_0_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #0, %d0
+; CHECK-NEXT: rts
+ ret i32 0
+}
+
+define i64 @return_0_i64() {
+; CHECK-LABEL: return_0_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #0, %d0
+; CHECK-NEXT: move.l %d0, %d1
+; CHECK-NEXT: rts
+ ret i64 0
+}
+
+define i16 @return_neg1_i16() {
+; CHECK-LABEL: return_neg1_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #-1, %d0
+; CHECK-NEXT: rts
+ ret i16 -1
+}
+
+define i32 @return_neg1_i32() {
+; CHECK-LABEL: return_neg1_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #-1, %d0
+; CHECK-NEXT: rts
+ ret i32 -1
+}
+
+define i8 @return_160_i8() {
+; CHECK-LABEL: return_160_i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #-96, %d0
+; CHECK-NEXT: rts
+ ret i8 160
+}
+
+define i16 @return_160_i16() {
+; CHECK-LABEL: return_160_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: move.w #160, %d0
+; CHECK-NEXT: rts
+ ret i16 160
+}
+
+define i32 @return_160_i32() {
+; CHECK-LABEL: return_160_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #95, %d0
+; CHECK-NEXT: not.b %d0
+; CHECK-NEXT: rts
+ ret i32 160
+}
+
+define i16 @return_14281_i16() {
+; CHECK-LABEL: return_14281_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: move.w #14281, %d0
+; CHECK-NEXT: rts
+ ret i16 14281
+}
+
+define i32 @return_14281_i32() {
+; CHECK-LABEL: return_14281_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: move.l #14281, %d0
+; CHECK-NEXT: rts
+ ret i32 14281
+}
+
+define i64 @return_14281_i64() {
+; CHECK-LABEL: return_14281_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: moveq #0, %d0
+; CHECK-NEXT: move.l #14281, %d1
+; CHECK-NEXT: rts
+ ret i64 14281
+}
+
+define ptr @return_null() {
+; CHECK-LABEL: return_null:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: suba.l %a0, %a0
+; CHECK-NEXT: rts
+ ret ptr null
+}
+
+define ptr @return_nonnull() {
+; CHECK-LABEL: return_nonnull:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: move.w #200, %a0
+; CHECK-NEXT: rts
+ ret ptr inttoptr (i32 200 to ptr)
+}
+
+define ptr @return_large_nonnull() {
+; CHECK-LABEL: return_large_nonnull:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT: move.l #74281, %a0
+; CHECK-NEXT: rts
+ ret ptr inttoptr (i32 74281 to ptr)
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/M68k/Arith/sext-i1.ll b/llvm/test/CodeGen/M68k/Data/sext-i1.ll
index 2be5224..2be5224 100644
--- a/llvm/test/CodeGen/M68k/Arith/sext-i1.ll
+++ b/llvm/test/CodeGen/M68k/Data/sext-i1.ll