diff options
author | Hans Wennborg <hans@hanshq.net> | 2018-09-10 08:11:26 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2018-09-10 08:11:26 +0000 |
commit | 3472e2a5a51ff45ddf7a62dec3ce7a0451f04fa3 (patch) | |
tree | 4ce0f6dc90683e99ed49613df2f8bf3152ee0b59 | |
parent | 3065b0a70b0baa3af5b32df80c7cd216a4fd02a2 (diff) | |
download | llvm-3472e2a5a51ff45ddf7a62dec3ce7a0451f04fa3.zip llvm-3472e2a5a51ff45ddf7a62dec3ce7a0451f04fa3.tar.gz llvm-3472e2a5a51ff45ddf7a62dec3ce7a0451f04fa3.tar.bz2 |
Merging r341642:
------------------------------------------------------------------------
r341642 | tnorthover | 2018-09-07 11:21:25 +0200 (Fri, 07 Sep 2018) | 8 lines
ARM: fix Thumb2 CodeGen for ldrex with folded frame-index.
Because t2LDREX (& t2STREX) were marked as AddrModeNone, but did allow a
FrameIndex operand, rewriteT2FrameIndex asserted. This gives them a
proper addressing-mode and tells the rewriter about it so that encodable
offsets are exploited and others are rejected.
Should fix PR38828.
------------------------------------------------------------------------
llvm-svn: 341783
-rw-r--r-- | llvm/lib/Target/ARM/ARMFrameLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrFormats.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ldrex-frame-size.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/ldstrex.ll | 85 |
7 files changed, 133 insertions, 3 deletions
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index a8c7570..56ad7a0 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1514,6 +1514,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: + case ARMII::AddrModeT2_ldrex: Limit = std::min(Limit, ((1U << 8) - 1) * 4); break; case ARMII::AddrModeT2_i12: diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index 70aded2..1d3b141 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -109,6 +109,7 @@ def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; def AddrMode_i12 : AddrMode<16>; def AddrMode5FP16 : AddrMode<17>; +def AddrModeT2_ldrex : AddrMode<18>; // Load / store index mode. class IndexMode<bits<2> val> { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c7133b6..f67075f 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3267,7 +3267,7 @@ def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, Requires<[IsThumb, HasV8MBaseline]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), - AddrModeNone, 4, NoItinerary, + AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, Requires<[IsThumb, HasV8MBaseline]> { @@ -3346,7 +3346,7 @@ def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), - AddrModeNone, 4, NoItinerary, + AddrModeT2_ldrex, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index b918006..beeb5de 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -201,7 +201,8 @@ namespace ARMII { AddrModeT2_pc = 14, // +/- i12 for pc relative data AddrModeT2_i8s4 = 15, // i8 * 4 AddrMode_i12 = 16, - AddrMode5FP16 = 17 // i8 * 2 + AddrMode5FP16 = 17, // i8 * 2 + AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst }; inline static const char *AddrModeToString(AddrMode addrmode) { @@ -224,6 +225,7 @@ namespace ARMII { case AddrModeT2_pc: return "AddrModeT2_pc"; case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; case AddrMode_i12: return "AddrMode_i12"; + case AddrModeT2_ldrex:return "AddrModeT2_ldrex"; } } diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index d5f0ba9..1a91a70 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -621,6 +621,11 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // MCInst operand expects already scaled value. Scale = 1; assert((Offset & 3) == 0 && "Can't encode this offset!"); + } else if (AddrMode == ARMII::AddrModeT2_ldrex) { + Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; + NumBits = 8; // 8 bits scaled by 4 + Scale = 4; + assert((Offset & 3) == 0 && "Can't encode this offset!"); } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/llvm/test/CodeGen/ARM/ldrex-frame-size.ll b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll new file mode 100644 index 0000000..5955405 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=thumbv7-linux-gnueabi -o - %s | FileCheck %s + +; This alloca is just large enough that FrameLowering decides it needs a frame +; to guarantee access, based on the range of ldrex. + +; The actual alloca size is a bit of black magic, unfortunately: the real +; maximum accessible is 1020, but FrameLowering adds 16 bytes to its estimated +; stack size just because so the alloca is not actually the what the limit gets +; compared to. The important point is that we don't go up to ~4096, which is the +; default with no strange instructions. +define void @test_large_frame() { +; CHECK-LABEL: test_large_frame: +; CHECK: push +; CHECK: sub.w sp, sp, #1004 + + %ptr = alloca i32, i32 251 + + %addr = getelementptr i32, i32* %ptr, i32 1 + call i32 @llvm.arm.ldrex.p0i32(i32* %addr) + ret void +} + +; This alloca is just is just the other side of the limit, so no frame +define void @test_small_frame() { +; CHECK-LABEL: test_small_frame: +; CHECK-NOT: push +; CHECK: sub.w sp, sp, #1000 + + %ptr = alloca i32, i32 250 + + %addr = getelementptr i32, i32* %ptr, i32 1 + call i32 @llvm.arm.ldrex.p0i32(i32* %addr) + ret void +} + +declare i32 @llvm.arm.ldrex.p0i32(i32*) diff --git a/llvm/test/CodeGen/ARM/ldstrex.ll b/llvm/test/CodeGen/ARM/ldstrex.ll index 59349f7..73afa0e 100644 --- a/llvm/test/CodeGen/ARM/ldstrex.ll +++ b/llvm/test/CodeGen/ARM/ldstrex.ll @@ -142,6 +142,91 @@ define void @excl_addrmode() { ret void } +define void @test_excl_addrmode_folded() { +; CHECK-LABEL: test_excl_addrmode_folded: + %local = alloca i8, i32 4096 + + %local.0 = getelementptr i8, i8* %local, i32 4 + %local32.0 = bitcast i8* %local.0 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0) +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #4] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #4] + + %local.1 = getelementptr i8, i8* %local, i32 1020 + %local32.1 = bitcast i8* %local.1 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.1) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.1) +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #1020] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #1020] + + ret void +} + +define void @test_excl_addrmode_range() { +; CHECK-LABEL: test_excl_addrmode_range: + %local = alloca i8, i32 4096 + + %local.0 = getelementptr i8, i8* %local, i32 1024 + %local32.0 = bitcast i8* %local.0 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0) +; CHECK-T2ADDRMODE: mov r[[TMP:[0-9]+]], sp +; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], r[[TMP]], #1024 +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] + + ret void +} + +define void @test_excl_addrmode_align() { +; CHECK-LABEL: test_excl_addrmode_align: + %local = alloca i8, i32 4096 + + %local.0 = getelementptr i8, i8* %local, i32 2 + %local32.0 = bitcast i8* %local.0 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0) +; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp +; CHECK-T2ADDRMODE: adds r[[ADDR:[0-9]+]], #2 +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] + + ret void +} + +define void @test_excl_addrmode_sign() { +; CHECK-LABEL: test_excl_addrmode_sign: + %local = alloca i8, i32 4096 + + %local.0 = getelementptr i8, i8* %local, i32 -4 + %local32.0 = bitcast i8* %local.0 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0) +; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp +; CHECK-T2ADDRMODE: subs r[[ADDR:[0-9]+]], #4 +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] + + ret void +} + +define void @test_excl_addrmode_combination() { +; CHECK-LABEL: test_excl_addrmode_combination: + %local = alloca i8, i32 4096 + %unused = alloca i8, i32 64 + + %local.0 = getelementptr i8, i8* %local, i32 4 + %local32.0 = bitcast i8* %local.0 to i32* + call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0) + call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0) +; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #68] +; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #68] + + ret void +} + + ; LLVM should know, even across basic blocks, that ldrex is setting the high ; bits of its i32 to 0. There should be no zero-extend operation. define zeroext i8 @test_cross_block_zext_i8(i1 %tst, i8* %addr) { |