aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorAmara Emerson <amara@apple.com>2020-09-29 14:39:54 -0700
committerHans Wennborg <hans@chromium.org>2020-09-30 13:05:48 +0200
commita3aee2678d07e249dca18493d2acd898eefd48dd (patch)
tree49ddf31949a4a5e57f0df644066357f57dce9ea3 /llvm
parentdda0a1867cc0c4ace4535f179aec85c3ff8cfa96 (diff)
downloadllvm-a3aee2678d07e249dca18493d2acd898eefd48dd.zip
llvm-a3aee2678d07e249dca18493d2acd898eefd48dd.tar.gz
llvm-a3aee2678d07e249dca18493d2acd898eefd48dd.tar.bz2
[GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.
During lowering of G_UMULO and friends, the previous code moved the builder's insertion point to be after the legalizing instruction. When that happened, if there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder would try to find that constant during the buildConstant(zero) call, and since it dominates itself would return the iterator unchanged, even though the def of the constant was *after* the current insertion point. This resulted in the compare being generated *before* the constant which it was using. There's no need to modify the insertion point before building the mul-hi or constant. Delaying moving the insert point ensures those are built/CSEd before the G_ICMP is built. Fixes PR47679 Differential Revision: https://reviews.llvm.org/D88514 (cherry picked from commit 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03)
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir68
-rw-r--r--llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir2
-rw-r--r--llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll12
4 files changed, 76 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da519f9..244e7a9 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2368,11 +2368,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.RemoveOperand(1);
Observer.changedInstr(MI);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ // Move insert point forward so we can use the Res register if needed.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
// For *signed* multiply, overflow is detected by checking:
// (hi != (lo >> bitwidth-1))
if (Opcode == TargetOpcode::G_SMULH) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
index 3260eb6..187ddeb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
@@ -28,8 +28,8 @@ body: |
; CHECK-LABEL: name: test_smul_overflow
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]]
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]]
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]]
@@ -51,9 +51,9 @@ body: |
; CHECK-LABEL: name: test_umul_overflow
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]]
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
; CHECK: $x0 = COPY [[MUL]](s64)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
@@ -66,3 +66,67 @@ body: |
$w0 = COPY %4(s32)
...
+---
+name: test_umulo_overflow_no_invalid_mir
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 16
+stack:
+ - { id: 0, size: 8, alignment: 8 }
+ - { id: 1, size: 8, alignment: 8 }
+ - { id: 2, size: 16, alignment: 16 }
+ - { id: 3, size: 16, alignment: 8 }
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0
+ ; before it's been defined.
+ ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3
+ ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8)
+ ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8)
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8)
+ ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]]
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
+ ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+ ; CHECK: $x0 = COPY [[MUL]](s64)
+ ; CHECK: $x1 = COPY [[AND]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %25:_(s32) = G_CONSTANT i32 0
+ %3:_(p0) = G_FRAME_INDEX %stack.0
+ %4:_(p0) = G_FRAME_INDEX %stack.1
+ %6:_(p0) = G_FRAME_INDEX %stack.3
+ G_STORE %2(s64), %3(p0) :: (store 8)
+ G_STORE %1(s64), %4(p0) :: (store 8)
+ %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8)
+ %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8)
+ %9:_(s64), %10:_(s1) = G_UMULO %7, %8
+ %31:_(s64) = G_CONSTANT i64 0
+ G_STORE %31(s64), %6(p0) :: (store 8, align 1)
+ %16:_(s64) = G_ZEXT %10(s1)
+ $x0 = COPY %9(s64)
+ $x1 = COPY %16(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
index c92a55d..b146aa5 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
@@ -439,9 +439,9 @@ body: |
; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3
- ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 659eadf..f7250cc 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) {
; MIPS32-LABEL: umul_with_overflow:
; MIPS32: # %bb.0:
-; MIPS32-NEXT: mul $1, $4, $5
; MIPS32-NEXT: multu $4, $5
-; MIPS32-NEXT: mfhi $2
-; MIPS32-NEXT: sltu $2, $zero, $2
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: sb $2, 0($7)
-; MIPS32-NEXT: sw $1, 0($6)
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: mul $2, $4, $5
+; MIPS32-NEXT: sltu $1, $zero, $1
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: sb $1, 0($7)
+; MIPS32-NEXT: sw $2, 0($6)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
%res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)