diff options
author | Amara Emerson <amara@apple.com> | 2020-09-29 14:39:54 -0700 |
---|---|---|
committer | Hans Wennborg <hans@chromium.org> | 2020-09-30 13:05:48 +0200 |
commit | a3aee2678d07e249dca18493d2acd898eefd48dd (patch) | |
tree | 49ddf31949a4a5e57f0df644066357f57dce9ea3 /llvm | |
parent | dda0a1867cc0c4ace4535f179aec85c3ff8cfa96 (diff) | |
download | llvm-a3aee2678d07e249dca18493d2acd898eefd48dd.zip llvm-a3aee2678d07e249dca18493d2acd898eefd48dd.tar.gz llvm-a3aee2678d07e249dca18493d2acd898eefd48dd.tar.bz2 |
[GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.
During lowering of G_UMULO and friends, the previous code moved the builder's
insertion point to be after the legalizing instruction. When that happened, if
there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder
would try to find that constant during the buildConstant(zero) call, and since
it dominates itself would return the iterator unchanged, even though the def
of the constant was *after* the current insertion point. This resulted in the
compare being generated *before* the constant which it was using.
There's no need to modify the insertion point before building the mul-hi or
constant. Delaying moving the insert point ensures those are built/CSEd before
the G_ICMP is built.
Fixes PR47679
Differential Revision: https://reviews.llvm.org/D88514
(cherry picked from commit 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03)
Diffstat (limited to 'llvm')
4 files changed, 76 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index da519f9..244e7a9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2368,11 +2368,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.RemoveOperand(1); Observer.changedInstr(MI); - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); auto Zero = MIRBuilder.buildConstant(Ty, 0); + // Move insert point forward so we can use the Res register if needed. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 3260eb6..187ddeb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -28,8 +28,8 @@ body: | ; CHECK-LABEL: name: test_smul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] @@ -51,9 +51,9 @@ body: | ; CHECK-LABEL: name: test_umul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK: $x0 = COPY [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) @@ -66,3 +66,67 @@ body: | $w0 = COPY %4(s32) ... +--- +name: test_umulo_overflow_no_invalid_mir +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 8, alignment: 8 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 16, alignment: 16 } + - { id: 3, size: 16, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0 + ; before it's been defined. + ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3 + ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8) + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: $x0 = COPY [[MUL]](s64) + ; CHECK: $x1 = COPY [[AND]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %25:_(s32) = G_CONSTANT i32 0 + %3:_(p0) = G_FRAME_INDEX %stack.0 + %4:_(p0) = G_FRAME_INDEX %stack.1 + %6:_(p0) = G_FRAME_INDEX %stack.3 + G_STORE %2(s64), %3(p0) :: (store 8) + G_STORE %1(s64), %4(p0) :: (store 8) + %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8) + %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8) + %9:_(s64), %10:_(s1) = G_UMULO %7, %8 + %31:_(s64) = G_CONSTANT i64 0 + G_STORE %31(s64), %6(p0) :: (store 8, align 1) + %16:_(s64) = G_ZEXT %10(s1) + $x0 = COPY %9(s64) + $x1 = COPY %16(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index c92a55d..b146aa5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -439,9 +439,9 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index 659eadf..f7250cc 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ; MIPS32-LABEL: umul_with_overflow: ; MIPS32: # %bb.0: -; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: multu $4, $5 -; MIPS32-NEXT: mfhi $2 -; MIPS32-NEXT: sltu $2, $zero, $2 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: sb $2, 0($7) -; MIPS32-NEXT: sw $1, 0($6) +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: mul $2, $4, $5 +; MIPS32-NEXT: sltu $1, $zero, $1 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sb $1, 0($7) +; MIPS32-NEXT: sw $2, 0($6) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) |