diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-02-26 23:13:59 -0500 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2024-07-02 17:12:54 +0200 |
commit | 26052805305f1f9dcbf3ab4102eb6b2372eb8dce (patch) | |
tree | 9fb6b343d00bcd1006b4c4c0f2b73eb46072e28c | |
parent | e10d4e81ac6a2136f203252f765a1421a8ddb798 (diff) | |
download | llvm-users/arsenm/global-isel-use-uadde-narrow-umulh.zip llvm-users/arsenm/global-isel-use-uadde-narrow-umulh.tar.gz llvm-users/arsenm/global-isel-use-uadde-narrow-umulh.tar.bz2 |
GlobalISel: Use G_UADDE when narrowing G_UMULHusers/arsenm/global-isel-use-uadde-narrow-umulh
This greatly shrinks the AMDGPU div64 expansion.
Instead of adding a zext of the condition output, add a zero and use
the carry in to G_UADDE. This is closer to how the DAG expansion using
umulh does it, and it seems more natural to leave the boolean output
as a boolean input. We should have a combine to form G_UADDE from this
pattern, but the legalizer shouldn't create extra work for the
combiner if it can help it.
The Mips cases are regressions, but the DAG lowering for muli128 seems
to not use the expansion involving MULHU/MULHS at all. The DAG output
is radically different than GlobalISel as-is, so it seems like Mips
should be using a different legalization strategy here to begin with.
The RISCV legalizer tests look worse for the mul i96 case, but those
didn't exist when I wrote this patch and forgot about it 4 years ago,
so I haven't really looked into why. We've entered the age where most tests
should just be using IR, so I don't know if this matters or not (the IR mul
test doesn't seem to cover i96)
19 files changed, 10181 insertions, 12360 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 86de1f3..6a0175c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5719,6 +5719,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, ArrayRef<Register> Src1Regs, ArrayRef<Register> Src2Regs, LLT NarrowTy) { + const LLT S1 = LLT::scalar(1); MachineIRBuilder &B = MIRBuilder; unsigned SrcParts = Src1Regs.size(); unsigned DstParts = DstRegs.size(); @@ -5731,6 +5732,8 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, unsigned CarrySumPrevDstIdx; SmallVector<Register, 4> Factors; + const Register Zero = B.buildConstant(NarrowTy, 0).getReg(0); + for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { // Collect low parts of muls for DstIdx. for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; @@ -5755,15 +5758,15 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, // Add all factors and accumulate all carries into CarrySum. if (DstIdx != DstParts - 1) { MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); + B.buildUAddo(NarrowTy, S1, Factors[0], Factors[1]); FactorSum = Uaddo.getReg(0); - CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); + CarrySum = Zero; for (unsigned i = 2; i < Factors.size(); ++i) { - MachineInstrBuilder Uaddo = - B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); - FactorSum = Uaddo.getReg(0); - MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); - CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); + auto Uadde = + B.buildUAdde(NarrowTy, S1, FactorSum, Factors[i], Uaddo.getReg(1)); + FactorSum = Uadde.getReg(0); + CarrySum = B.buildUAdde(NarrowTy, S1, CarrySum, Zero, Uadde.getReg(1)) + .getReg(0); } } else { // Since value for the next index is not calculated, neither is CarrySum. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll index 20827e6..5d8546c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll @@ -68,26 +68,25 @@ define i128 @mul_i128(i128 %x, i128 %y) { define i160 @mul_i160(i160 %x, i160 %y) { ; CHECK-LABEL: mul_i160: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x9, x1, x4 -; CHECK-NEXT: mul x10, x1, x5 -; CHECK-NEXT: mul x11, x0, x5 -; CHECK-NEXT: umulh x12, x0, x4 -; CHECK-NEXT: madd x10, x2, x4, x10 -; CHECK-NEXT: adds x9, x9, x11 -; CHECK-NEXT: umulh x13, x1, x4 +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: umulh x14, x1, x4 +; CHECK-NEXT: adcs x1, x8, x10 +; CHECK-NEXT: adc x9, xzr, xzr +; CHECK-NEXT: adds x10, x11, x12 +; CHECK-NEXT: umulh x8, x0, x5 ; CHECK-NEXT: cset w11, hs -; CHECK-NEXT: adds x1, x9, x12 -; CHECK-NEXT: and x11, x11, #0x1 -; CHECK-NEXT: umulh x14, x0, x5 -; CHECK-NEXT: mul x8, x0, x4 -; CHECK-NEXT: madd x9, x0, x6, x10 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: and x10, x10, #0x1 -; CHECK-NEXT: add x10, x11, x10 -; CHECK-NEXT: add x11, x13, x14 -; CHECK-NEXT: add x10, x11, x10 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: add x2, x9, x10 +; CHECK-NEXT: adc x10, x10, x13 +; CHECK-NEXT: cmp w11, #1 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: adc x10, x10, x14 +; CHECK-NEXT: adc x8, x10, x8 +; CHECK-NEXT: adc x2, x8, x9 ; CHECK-NEXT: ret %mul = mul i160 %x, %y ret i160 %mul @@ -96,26 +95,25 @@ define i160 @mul_i160(i160 %x, i160 %y) { define i192 @mul_i192(i192 %x, i192 %y) { ; CHECK-LABEL: mul_i192: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x9, x1, x4 -; CHECK-NEXT: mul x10, x1, x5 -; CHECK-NEXT: mul x11, x0, x5 -; CHECK-NEXT: umulh x12, x0, x4 -; CHECK-NEXT: madd x10, x2, x4, x10 -; CHECK-NEXT: adds x9, x9, x11 -; CHECK-NEXT: umulh x13, x1, x4 +; CHECK-NEXT: mul x8, x1, x4 +; CHECK-NEXT: mul x9, x0, x5 +; CHECK-NEXT: umulh x10, x0, x4 +; CHECK-NEXT: mul x11, x2, x4 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: mul x12, x1, x5 +; CHECK-NEXT: mul x13, x0, x6 +; CHECK-NEXT: umulh x14, x1, x4 +; CHECK-NEXT: adcs x1, x8, x10 +; CHECK-NEXT: adc x9, xzr, xzr +; CHECK-NEXT: adds x10, x11, x12 +; CHECK-NEXT: umulh x8, x0, x5 ; CHECK-NEXT: cset w11, hs -; CHECK-NEXT: adds x1, x9, x12 -; CHECK-NEXT: and x11, x11, #0x1 -; CHECK-NEXT: umulh x14, x0, x5 -; CHECK-NEXT: mul x8, x0, x4 -; CHECK-NEXT: madd x9, x0, x6, x10 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: and x10, x10, #0x1 -; CHECK-NEXT: add x10, x11, x10 -; CHECK-NEXT: add x11, x13, x14 -; CHECK-NEXT: add x10, x11, x10 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: add x2, x9, x10 +; CHECK-NEXT: adc x10, x10, x13 +; CHECK-NEXT: cmp w11, #1 +; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: adc x10, x10, x14 +; CHECK-NEXT: adc x8, x10, x8 +; CHECK-NEXT: adc x2, x8, x9 ; CHECK-NEXT: ret %mul = mul i192 %x, %y ret i192 %mul @@ -127,50 +125,40 @@ define i224 @mul_i224(i224 %x, i224 %y) { ; CHECK-NEXT: mul x8, x1, x4 ; CHECK-NEXT: mul x9, x0, x5 ; CHECK-NEXT: umulh x10, x0, x4 -; CHECK-NEXT: mul x14, x2, x5 -; CHECK-NEXT: adds x8, x8, x9 ; CHECK-NEXT: mul x11, x2, x4 -; CHECK-NEXT: cset w9, hs -; CHECK-NEXT: adds x8, x8, x10 -; CHECK-NEXT: and x9, x9, #0x1 +; CHECK-NEXT: adds x8, x8, x9 ; CHECK-NEXT: mul x12, x1, x5 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: and x10, x10, #0x1 +; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: mul x14, x2, x5 +; CHECK-NEXT: adc x10, xzr, xzr ; CHECK-NEXT: mul x13, x0, x6 -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: umulh x15, x1, x4 ; CHECK-NEXT: adds x11, x11, x12 +; CHECK-NEXT: umulh x15, x1, x4 ; CHECK-NEXT: madd x14, x3, x4, x14 ; CHECK-NEXT: umulh x16, x0, x5 ; CHECK-NEXT: madd x12, x1, x6, x14 ; CHECK-NEXT: cset w14, hs -; CHECK-NEXT: adds x11, x11, x13 -; CHECK-NEXT: cset w13, hs -; CHECK-NEXT: adds x11, x11, x15 -; CHECK-NEXT: and x14, x14, #0x1 +; CHECK-NEXT: adcs x11, x11, x13 +; CHECK-NEXT: adc x13, xzr, xzr +; CHECK-NEXT: cmp w14, #1 ; CHECK-NEXT: umulh x17, x2, x4 -; CHECK-NEXT: and x13, x13, #0x1 -; CHECK-NEXT: add x13, x14, x13 -; CHECK-NEXT: umulh x18, x1, x5 -; CHECK-NEXT: cset w1, hs -; CHECK-NEXT: adds x11, x11, x16 -; CHECK-NEXT: and x14, x1, #0x1 +; CHECK-NEXT: adcs x11, x11, x15 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x9, x1, x5 +; CHECK-NEXT: adcs x11, x11, x16 ; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: umulh x15, x0, x6 -; CHECK-NEXT: add x10, x13, x14 -; CHECK-NEXT: cset w13, hs -; CHECK-NEXT: adds x2, x11, x9 -; CHECK-NEXT: and x9, x13, #0x1 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x18, x0, x6 +; CHECK-NEXT: adcs x2, x11, x10 +; CHECK-NEXT: adc x10, x13, xzr ; CHECK-NEXT: madd x12, x0, x7, x12 -; CHECK-NEXT: cset w11, hs -; CHECK-NEXT: add x9, x10, x9 -; CHECK-NEXT: add x13, x17, x18 -; CHECK-NEXT: and x10, x11, #0x1 +; CHECK-NEXT: add x9, x17, x9 ; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: add x9, x9, x18 ; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: add x11, x13, x15 -; CHECK-NEXT: add x10, x12, x11 -; CHECK-NEXT: add x3, x10, x9 +; CHECK-NEXT: add x3, x12, x9 ; CHECK-NEXT: ret %mul = mul i224 %x, %y ret i224 %mul @@ -182,50 +170,40 @@ define i256 @mul_i256(i256 %x, i256 %y) { ; CHECK-NEXT: mul x8, x1, x4 ; CHECK-NEXT: mul x9, x0, x5 ; CHECK-NEXT: umulh x10, x0, x4 -; CHECK-NEXT: mul x14, x2, x5 -; CHECK-NEXT: adds x8, x8, x9 ; CHECK-NEXT: mul x11, x2, x4 -; CHECK-NEXT: cset w9, hs -; CHECK-NEXT: adds x8, x8, x10 -; CHECK-NEXT: and x9, x9, #0x1 +; CHECK-NEXT: adds x8, x8, x9 ; CHECK-NEXT: mul x12, x1, x5 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: and x10, x10, #0x1 +; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: mul x14, x2, x5 +; CHECK-NEXT: adc x10, xzr, xzr ; CHECK-NEXT: mul x13, x0, x6 -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: umulh x15, x1, x4 ; CHECK-NEXT: adds x11, x11, x12 +; CHECK-NEXT: umulh x15, x1, x4 ; CHECK-NEXT: madd x14, x3, x4, x14 ; CHECK-NEXT: umulh x16, x0, x5 ; CHECK-NEXT: madd x12, x1, x6, x14 ; CHECK-NEXT: cset w14, hs -; CHECK-NEXT: adds x11, x11, x13 -; CHECK-NEXT: cset w13, hs -; CHECK-NEXT: adds x11, x11, x15 -; CHECK-NEXT: and x14, x14, #0x1 +; CHECK-NEXT: adcs x11, x11, x13 +; CHECK-NEXT: adc x13, xzr, xzr +; CHECK-NEXT: cmp w14, #1 ; CHECK-NEXT: umulh x17, x2, x4 -; CHECK-NEXT: and x13, x13, #0x1 -; CHECK-NEXT: add x13, x14, x13 -; CHECK-NEXT: umulh x18, x1, x5 -; CHECK-NEXT: cset w1, hs -; CHECK-NEXT: adds x11, x11, x16 -; CHECK-NEXT: and x14, x1, #0x1 +; CHECK-NEXT: adcs x11, x11, x15 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x9, x1, x5 +; CHECK-NEXT: adcs x11, x11, x16 ; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: umulh x15, x0, x6 -; CHECK-NEXT: add x10, x13, x14 -; CHECK-NEXT: cset w13, hs -; CHECK-NEXT: adds x2, x11, x9 -; CHECK-NEXT: and x9, x13, #0x1 +; CHECK-NEXT: adc x13, x13, xzr +; CHECK-NEXT: cmp w14, #1 +; CHECK-NEXT: umulh x18, x0, x6 +; CHECK-NEXT: adcs x2, x11, x10 +; CHECK-NEXT: adc x10, x13, xzr ; CHECK-NEXT: madd x12, x0, x7, x12 -; CHECK-NEXT: cset w11, hs -; CHECK-NEXT: add x9, x10, x9 -; CHECK-NEXT: add x13, x17, x18 -; CHECK-NEXT: and x10, x11, #0x1 +; CHECK-NEXT: add x9, x17, x9 ; CHECK-NEXT: mul x0, x0, x4 +; CHECK-NEXT: add x9, x9, x18 ; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: add x11, x13, x15 -; CHECK-NEXT: add x10, x12, x11 -; CHECK-NEXT: add x3, x10, x9 +; CHECK-NEXT: add x3, x12, x9 ; CHECK-NEXT: ret %mul = mul i256 %x, %y ret i256 %mul diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 2bf8649..a79f1db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -619,25 +619,24 @@ body: | ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[UMULH1]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH2]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UADDE2]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDE]](s32), [[ADD4]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; ; GFX89-LABEL: name: test_mul_s96 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index f9ec3bc..81e13b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -49,6 +49,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -87,6 +88,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -125,6 +127,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -244,6 +247,7 @@ body: | ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_sdiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -313,6 +317,7 @@ body: | ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_sdiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -382,6 +387,7 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_sdiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -506,6 +512,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -515,89 +522,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -610,9 +601,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] @@ -620,9 +611,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -635,6 +626,7 @@ body: | ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_sdiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -683,89 +675,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -782,9 +759,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -792,9 +769,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -807,6 +784,7 @@ body: | ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_sdiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -855,89 +833,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -954,9 +917,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -964,9 +927,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -979,6 +942,7 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_sdiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1027,93 +991,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1126,9 +1075,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] @@ -1136,9 +1085,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1208,6 +1157,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1217,89 +1167,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1312,9 +1246,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV26]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV27]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -1322,9 +1256,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV28]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV29]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1340,14 +1274,14 @@ body: | ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO25]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO27]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -1371,94 +1305,79 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH16]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH18]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE62]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO32]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO32]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[MUL24]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH21]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH23]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE76]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]] ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO38]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE78]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO38]] + ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH25]], [[UADDO41]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE78]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO38]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE78]] + ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH27]], [[UADDO43]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX6-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX6-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE78]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE90]] + ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32) ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDE88]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDE88]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDE88]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] + ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD17]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD17]] ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -1470,9 +1389,9 @@ body: | ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX6-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] - ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV56]] + ; GFX6-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV57]], [[UADDO45]] + ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] @@ -1480,9 +1399,9 @@ body: | ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX6-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] - ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV58]] + ; GFX6-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV59]], [[UADDO47]] + ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -1496,6 +1415,7 @@ body: | ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_sdiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1546,89 +1466,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1645,9 +1550,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] @@ -1655,9 +1560,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -1673,14 +1578,14 @@ body: | ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -1709,85 +1614,70 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]] ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]] + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX8-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX8-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32) ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]] ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] @@ -1803,9 +1693,9 @@ body: | ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]] + ; GFX8-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]] + ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] @@ -1813,9 +1703,9 @@ body: | ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]] + ; GFX8-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]] + ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -1829,6 +1719,7 @@ body: | ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_sdiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1879,89 +1770,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1978,9 +1854,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] @@ -1988,9 +1864,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -2006,14 +1882,14 @@ body: | ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -2042,85 +1918,70 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]] ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]] + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX9-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX9-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32) ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]] ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] @@ -2136,9 +1997,9 @@ body: | ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] - ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] - ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]] + ; GFX9-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]] + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] @@ -2146,9 +2007,9 @@ body: | ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] - ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] - ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]] + ; GFX9-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]] + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -2162,6 +2023,7 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_sdiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2212,93 +2074,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -2311,9 +2158,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV32]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV32]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV33]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] @@ -2321,9 +2168,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV34]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV34]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV35]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -2339,14 +2186,14 @@ body: | ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO25]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32) ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO27]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32) ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) @@ -2369,95 +2216,80 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV54]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV54]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]] ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV56]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV56]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV56]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV56]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]] ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO68]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO68]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE18]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE18]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO38]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE78]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO38]] + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE78]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO38]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE78]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]] + ; GFX10-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]] + ; GFX10-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE78]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32) ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDO78]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDE88]], [[C5]] ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDO78]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDE88]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV58]], [[UV64]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD35]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD17]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD17]] ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -2469,9 +2301,9 @@ body: | ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]] ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV68]] - ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]] - ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV68]] + ; GFX10-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV69]], [[UADDO45]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32) ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV66]] @@ -2479,9 +2311,9 @@ body: | ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV70]] - ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]] - ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV70]] + ; GFX10-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV71]], [[UADDO47]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] @@ -2547,6 +2379,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2587,6 +2420,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2627,6 +2461,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2763,6 +2598,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2844,6 +2680,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2922,6 +2759,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB7]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_sdiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3052,6 +2890,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3092,6 +2931,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3132,6 +2972,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3227,6 +3068,7 @@ body: | ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3267,6 +3109,7 @@ body: | ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3307,6 +3150,7 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; ; GFX10-LABEL: name: test_sdiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3407,6 +3251,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -3416,89 +3261,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -3511,9 +3340,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] @@ -3521,9 +3350,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3536,6 +3365,7 @@ body: | ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_sdiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3586,89 +3416,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3685,9 +3500,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -3695,9 +3510,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3710,6 +3525,7 @@ body: | ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_sdiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3760,89 +3576,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3859,9 +3660,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] @@ -3869,9 +3670,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3884,6 +3685,7 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_sdiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -3934,93 +3736,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -4033,9 +3820,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] @@ -4043,9 +3830,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index 08bb589..bb2b47d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -45,6 +45,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -79,6 +80,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -113,6 +115,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -221,6 +224,7 @@ body: | ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_srem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -283,6 +287,7 @@ body: | ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_srem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -345,6 +350,7 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_srem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -462,6 +468,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -471,88 +478,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] @@ -586,6 +577,7 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_srem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -634,88 +626,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -753,6 +730,7 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_srem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -801,88 +779,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -920,6 +883,7 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_srem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -968,92 +932,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] @@ -1144,6 +1093,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1153,88 +1103,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] @@ -1271,14 +1205,14 @@ body: | ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] - ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO21]] + ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] - ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO23]] + ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1302,93 +1236,78 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH16]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH18]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE58]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO28]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO28]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[MUL24]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH21]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH23]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE72]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]] ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO34]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE74]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO34]] + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH25]], [[UADDO37]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE74]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO34]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE74]] + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH27]], [[UADDO39]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE74]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE86]] ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDE84]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDE84]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDE84]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD17]], [[USUBO13]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD17]] ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] @@ -1423,6 +1342,7 @@ body: | ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_srem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1473,88 +1393,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1595,14 +1500,14 @@ body: | ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]] + ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]] + ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1631,84 +1536,69 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]] ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]] ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] @@ -1747,6 +1637,7 @@ body: | ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_srem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1797,88 +1688,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] @@ -1919,14 +1795,14 @@ body: | ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] - ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]] + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] - ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]] + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -1955,84 +1831,69 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]] ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]] ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] @@ -2071,6 +1932,7 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_srem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2121,92 +1983,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] @@ -2243,14 +2090,14 @@ body: | ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]] - ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO21]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]] - ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO23]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32) ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) @@ -2273,94 +2120,79 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV50]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV50]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]] ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV52]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV52]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV52]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV52]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV52]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]] ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO64]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO64]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE14]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE14]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO34]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE74]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO34]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE74]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO34]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE74]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE74]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]] ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDO74]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDE84]], [[C5]] ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDO74]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDE84]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV54]], [[UV60]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD35]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD17]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD17]] ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV63]] @@ -2445,6 +2277,7 @@ body: | ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_srem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2483,6 +2316,7 @@ body: | ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_srem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2521,6 +2355,7 @@ body: | ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_srem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2648,6 +2483,7 @@ body: | ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2722,6 +2558,7 @@ body: | ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2793,6 +2630,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_srem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2912,6 +2750,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2948,6 +2787,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2984,6 +2824,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3071,6 +2912,7 @@ body: | ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX8-LABEL: name: test_srem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -3107,6 +2949,7 @@ body: | ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX9-LABEL: name: test_srem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -3143,6 +2986,7 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; ; GFX10-LABEL: name: test_srem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -3239,6 +3083,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -3248,88 +3093,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]] ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] @@ -3363,6 +3192,7 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX8-LABEL: name: test_srem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3413,88 +3243,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3532,6 +3347,7 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX9-LABEL: name: test_srem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3582,88 +3398,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] @@ -3701,6 +3502,7 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; ; GFX10-LABEL: name: test_srem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -3751,92 +3553,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]] ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index bfba201..6d9a168 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -381,6 +381,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -390,89 +391,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -485,9 +470,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] @@ -495,9 +480,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -537,89 +522,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -636,9 +606,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -646,9 +616,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -688,89 +658,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -787,9 +742,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -797,9 +752,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -839,93 +794,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -938,9 +878,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] @@ -948,9 +888,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -998,6 +938,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -1007,89 +948,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1102,9 +1027,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV18]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV19]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] @@ -1112,9 +1037,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV20]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV21]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1140,94 +1065,79 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH16]], [[UADDO21]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH18]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE54]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO24]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO24]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[MUL24]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH21]], [[UADDO27]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH23]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE68]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO30]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE70]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO30]] + ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH25]], [[UADDO33]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE70]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO30]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE70]] + ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH27]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE70]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE82]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32) ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDE80]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDE80]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDE80]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] + ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD17]], [[USUBO9]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD17]] ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -1239,9 +1149,9 @@ body: | ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] - ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV36]] + ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV37]], [[UADDO37]] + ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] @@ -1249,9 +1159,9 @@ body: | ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] - ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV39]], [[UADDO39]] + ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1294,89 +1204,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1393,9 +1288,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] @@ -1403,9 +1298,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1436,85 +1331,70 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]] ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32) ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]] ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] @@ -1530,9 +1410,9 @@ body: | ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]] + ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]] + ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] @@ -1540,9 +1420,9 @@ body: | ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]] + ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]] + ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1585,89 +1465,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1684,9 +1549,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] @@ -1694,9 +1559,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -1727,85 +1592,70 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]] ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32) ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]] ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] @@ -1821,9 +1671,9 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] - ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]] + ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]] + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] @@ -1831,9 +1681,9 @@ body: | ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] - ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]] + ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]] + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -1876,93 +1726,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -1975,9 +1810,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV24]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV24]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV25]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] @@ -1985,9 +1820,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV26]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV26]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV27]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] @@ -2012,95 +1847,80 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV34]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV34]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]] ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV36]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV36]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV36]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV36]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV36]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]] ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO60]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO60]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE10]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE10]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] - ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO30]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE70]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO30]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE70]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO30]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE70]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]] + ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]] + ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE70]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDO70]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDE80]], [[C4]] ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDO70]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDE80]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV44]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD35]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD17]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD17]] ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) @@ -2112,9 +1932,9 @@ body: | ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]] ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV48]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]] - ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV48]] + ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV49]], [[UADDO37]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32) ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV46]] @@ -2122,9 +1942,9 @@ body: | ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV50]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]] - ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV50]] + ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV51]], [[UADDO39]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] @@ -2855,6 +2675,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -2864,89 +2685,73 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -2959,9 +2764,9 @@ body: | ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] - ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] @@ -2969,9 +2774,9 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] - ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3014,89 +2819,74 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3113,9 +2903,9 @@ body: | ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -3123,9 +2913,9 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3168,89 +2958,74 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3267,9 +3042,9 @@ body: | ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] @@ -3277,9 +3052,9 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] @@ -3322,93 +3097,78 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32) ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) @@ -3421,9 +3181,9 @@ body: | ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32) ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] @@ -3431,9 +3191,9 @@ body: | ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] - ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 3a919f0..54a2327 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -77,28 +77,24 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; ; GFX9-LABEL: name: test_umulh_s64 @@ -108,28 +104,24 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -152,52 +144,43 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; @@ -210,52 +193,43 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 13c52d0..2917b06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -115,42 +115,38 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64) ; ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -159,42 +155,38 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UMULO %0, %1 @@ -218,79 +210,70 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]] ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]] ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) @@ -304,79 +287,70 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]] ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]] ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 44f4412..b01e0ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -349,6 +349,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -358,88 +359,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] @@ -501,88 +486,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -648,88 +618,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -795,92 +750,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] @@ -950,6 +890,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -959,88 +900,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] @@ -1088,93 +1013,78 @@ body: | ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]] ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] - ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] - ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH16]], [[UADDO17]] + ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] + ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] - ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] - ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] - ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] - ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] - ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] - ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] - ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] - ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH18]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] + ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE50]] + ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]] + ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO20]] + ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]] + ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]] + ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO20]] + ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]] + ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[MUL24]] + ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]] + ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[MUL24]] + ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH21]], [[UADDO23]] + ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]] + ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[MUL24]] + ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]] + ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH23]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]] + ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE64]] + ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]] ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] - ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] - ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] - ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] - ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO26]] + ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE66]] + ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO26]] + ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH25]], [[UADDO29]] + ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE66]] + ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO26]] + ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE66]] + ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH27]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE66]] + ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE78]] ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE76]] + ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE76]] + ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD15]] + ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE76]] + ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]] ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD17]], [[USUBO11]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD17]] ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] @@ -1239,88 +1149,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1377,84 +1272,69 @@ body: | ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]] ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]] ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]] ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]] ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] @@ -1523,88 +1403,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]] ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] @@ -1661,84 +1526,69 @@ body: | ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] - ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] - ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] + ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]] ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] - ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] - ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] + ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]] ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] - ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] - ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] - ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] + ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] + ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]] ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]] ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] @@ -1807,92 +1657,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]] ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]] ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] @@ -1939,94 +1774,79 @@ body: | ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]] ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]] ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV30]] - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] - ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] - ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] + ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]] + ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]] ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV30]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] - ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] - ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] - ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] + ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]] + ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]] + ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]] + ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]] ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV32]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] - ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] - ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV32]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] - ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] - ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] - ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]] + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]] + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV32]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV32]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] + ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]] + ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV32]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] + ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]] + ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]] + ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]] + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]] + ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]] ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] - ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO56]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] - ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] - ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] - ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE6]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO26]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE66]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]] + ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE66]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE66]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] + ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]] + ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]] + ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE66]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]] ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDO66]], [[C4]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDE76]], [[C4]] ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD33]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDO66]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDE76]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]] ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV40]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD35]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD17]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD17]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV43]] @@ -2723,6 +2543,7 @@ body: | ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] @@ -2732,88 +2553,72 @@ body: | ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] - ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] - ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] - ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] - ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] - ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] - ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] - ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] - ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]] + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]] + ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]] + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]] + ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]] + ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]] + ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] - ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] - ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] - ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] - ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]] + ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]] + ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]] + ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]] + ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]] + ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]] ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]] + ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]] + ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]] + ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]] ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] @@ -2878,88 +2683,73 @@ body: | ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3028,88 +2818,73 @@ body: | ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]] ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] - ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]] ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] - ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] - ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] + ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]] ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] - ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]] ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] @@ -3178,92 +2953,77 @@ body: | ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]] ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] - ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] - ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]] ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]] ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] - ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] - ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] - ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] - ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] - ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]] + ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]] + ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]] ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] - ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] - ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] - ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] - ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] - ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] + ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]] + ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] + ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]] + ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]] + ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]] ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]] ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]] ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 377fa24..cafd4c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -26,133 +26,116 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v1, v3, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v2 -; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3 -; CHECK-NEXT: v_trunc_f32_e32 v8, v6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 -; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v8 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 -; CHECK-NEXT: v_mov_b32_e32 v3, v7 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] -; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v9, v7 -; CHECK-NEXT: v_mul_lo_u32 v14, v12, v7 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v6, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v7, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v7 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v7, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v3, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v8, v2 +; CHECK-NEXT: v_mul_lo_u32 v14, v11, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v12, vcc +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v12, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9 +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v10, v8, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v2 +; CHECK-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v12, vcc +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v14, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v3 -; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v6, vcc -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 -; CHECK-NEXT: v_mov_b32_e32 v3, v7 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] -; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v10 -; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v10, vcc -; CHECK-NEXT: v_xor_b32_e32 v8, v3, v10 -; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, v9, v7 -; CHECK-NEXT: v_xor_b32_e32 v11, v4, v10 -; CHECK-NEXT: v_mul_hi_u32 v4, v9, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v11, v3 -; CHECK-NEXT: v_mul_lo_u32 v6, v8, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v11, v3 -; CHECK-NEXT: v_mul_hi_u32 v9, v11, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v11, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v8, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v5 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v7, v[4:5] -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v4, vcc -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v11, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v10, vcc +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc +; CHECK-NEXT: v_mul_hi_u32 v10, v4, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, 0 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v10, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v8, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v4, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v8 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v9, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v7 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -235,18 +218,13 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc @@ -258,21 +236,16 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -280,38 +253,33 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_lo_u32 v3, s12, v1 ; CHECK-NEXT: v_mul_hi_u32 v4, s12, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s13, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s13, v1 +; CHECK-NEXT: v_mov_b32_e32 v5, s13 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s13, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, s12, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, s13, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, s12, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, s13, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v5, s13 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s12, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v3, s11 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v4, s11 ; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s13, v1 ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v2 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 @@ -321,11 +289,11 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc ; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] ; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 @@ -404,105 +372,90 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 ; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v17, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v5 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; GISEL-NEXT: v_mov_b32_e32 v5, v12 -; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[5:6] -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[5:6] +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] -; GISEL-NEXT: v_xor_b32_e32 v5, v0, v9 -; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 -; GISEL-NEXT: v_xor_b32_e32 v15, v1, v9 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v13, v0, v5 +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v14, v12 +; GISEL-NEXT: v_xor_b32_e32 v16, v1, v5 ; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v15 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v5, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v11 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v11 -; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v14, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v13, v[11:12] -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v15, v11, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v15, v11 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v16, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v16, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v0, v11, vcc +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v14, 0 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v11, v9 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v15, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v14, v[11:12] +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v16, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], v16, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v12, v11, v12, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v5 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v7, v1, v5 -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v1, v9 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v9 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v15, v6 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v13 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v14, vcc -; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v15 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v14 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v15, vcc +; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 @@ -515,130 +468,115 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v18 ; GISEL-NEXT: v_subb_u32_e32 v21, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v15, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v4, v13, v10, vcc ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v20, v18, v[1:2] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v16 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v21, v19, v[10:11] -; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v17, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v16, v1, vcc ; GISEL-NEXT: v_mul_lo_u32 v1, v18, v0 ; GISEL-NEXT: v_mul_lo_u32 v11, v19, v10 ; GISEL-NEXT: v_mul_hi_u32 v16, v19, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc ; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v18, v10 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_mul_hi_u32 v11, v19, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v18, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v10 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v10, v18, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v19, v0 -; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v18, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v10, 0 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v0 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v18, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v13, v9, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v11, v[1:2] -; GISEL-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v14, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v21, v10, v[8:9] -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; GISEL-NEXT: v_xor_b32_e32 v1, v4, v13 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v14, vcc -; GISEL-NEXT: v_xor_b32_e32 v9, v2, v14 -; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v4, v10, v8 -; GISEL-NEXT: v_xor_b32_e32 v15, v3, v14 -; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v14, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v20, v17, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v5, v8 +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[10:11] +; GISEL-NEXT: v_cndmask_b32_e32 v13, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v16, v4 +; GISEL-NEXT: v_xor_b32_e32 v11, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v3, v16, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v4, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v4, v9, v2 -; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v11, v15, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v17, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 +; GISEL-NEXT: v_xor_b32_e32 v10, v12, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v15, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GISEL-NEXT: v_mul_hi_u32 v4, v9, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v2 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v0, v4, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v11, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 +; GISEL-NEXT: v_xor_b32_e32 v13, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v4, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v11, v[0:1] -; GISEL-NEXT: v_xor_b32_e32 v8, v12, v13 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v13 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, v[3:4] -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v13, vcc -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v14, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v1 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v12, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v13, v1, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v8, v9, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v10 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v12 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v14, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_xor_b32_e32 v4, v14, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -662,131 +600,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v5, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v1, v3, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v1 -; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; CGP-NEXT: v_trunc_f32_e32 v5, v4 -; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v12, v3 -; CGP-NEXT: v_cvt_u32_f32_e32 v15, v5 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] -; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v17, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v18, v15, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 -; CGP-NEXT: v_mul_hi_u32 v16, v12, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v18, v3 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v4, vcc -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v5, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v3, v2 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v18, v15, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v16, v12, v2 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v16, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v2, vcc +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] ; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11 -; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v13 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v13 -; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v14, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 +; CGP-NEXT: v_xor_b32_e32 v11, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 ; CGP-NEXT: v_xor_b32_e32 v10, v10, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v15, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_mul_hi_u32 v14, v12, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v10, v3 -; CGP-NEXT: v_mul_lo_u32 v12, v11, v4 -; CGP-NEXT: v_mul_hi_u32 v14, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v10, v3 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_mul_hi_u32 v12, v11, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v2 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v16, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v15, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v2 +; CGP-NEXT: v_mul_hi_u32 v14, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v10, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v3, v5 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v14, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v5 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v12, v[4:5] -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v11, v3 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v14, v[4:5] -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v10, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v10, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v14, v11, v2 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v1, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v14, v10, v2 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v12, 0 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v14, v[2:3] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, v[2:3] +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v10, v2, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v10, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v11, s[4:5] -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v14 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v14, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v2, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc ; CGP-NEXT: v_xor_b32_e32 v3, v13, v0 -; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v2, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 ; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -835,131 +758,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: .LBB2_7: ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v5, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] -; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v10, v6 -; CGP-NEXT: v_mul_lo_u32 v16, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v6 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v5 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v6, vcc -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v7, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v7 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v7, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v5, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v5 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v4 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; CGP-NEXT: v_mul_hi_u32 v14, v10, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v14, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] ; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v11 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc -; CGP-NEXT: v_xor_b32_e32 v9, v7, v11 -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 +; CGP-NEXT: v_xor_b32_e32 v9, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 ; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_mul_hi_u32 v12, v10, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v8, v5 -; CGP-NEXT: v_mul_lo_u32 v10, v9, v6 -; CGP-NEXT: v_mul_hi_u32 v12, v9, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v8, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v4 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v8, v3 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v8, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v12, 0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v7 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[6:7] -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[6:7] -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v8, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v8, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v12, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v3, v10, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, 0 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v12, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v10, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v8, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v8, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5] -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v5, v11, v2 -; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 @@ -1069,18 +977,13 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1095,23 +998,18 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1121,24 +1019,19 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v7, v[1:2] ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 @@ -1149,8 +1042,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -1161,8 +1054,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 @@ -1182,6 +1075,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1199,18 +1094,13 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1226,23 +1116,18 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1252,28 +1137,21 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v9, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 @@ -1282,107 +1160,92 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v14 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v14, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v13, v15, v13, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v14, v[1:2] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 -; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1393,7 +1256,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc @@ -1405,12 +1268,12 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: @@ -1423,222 +1286,192 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v17, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v15, v0 -; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v13, vcc -; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v18, v13 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v16, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v17, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v11, vcc +; CGP-NEXT: v_sub_i32_e64 v11, s[4:5], v18, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v16 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v16 ; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v15 -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v17, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v19, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v18, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v11, v18, v11, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] -; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v6, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v9 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v8, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, -1, v7, s[4:5] +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v7 ; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -1673,130 +1506,115 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: .LBB7_3: ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v6, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v1, v5, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v2 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v2 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CHECK-NEXT: v_trunc_f32_e32 v7, v6 -; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7 -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v12, v8, v5 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_lo_u32 v7, v11, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v13, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v14, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CHECK-NEXT: v_mul_hi_u32 v12, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v5 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v6, vcc -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v7, v3, v9 -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v12, v4, v9 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 -; CHECK-NEXT: v_mul_hi_u32 v10, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v11, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v12, v3 -; CHECK-NEXT: v_mul_lo_u32 v6, v7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v12, v3 -; CHECK-NEXT: v_mul_hi_u32 v10, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v7, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v3, v5 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v8, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v5 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, v[4:5] -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v12, v4, vcc -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v10, s[4:5] -; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v8 -; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v7, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v8, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v7 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v8 +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 +; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v5, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v5 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v13 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v10 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v10, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v10 +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v9, v5 +; CHECK-NEXT: v_xor_b32_e32 v11, v3, v10 +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v5 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v2 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v6, v11, v2 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v5, 0 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v5, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v11, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v9, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v7 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v10, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 @@ -1868,21 +1686,16 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 ; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v18, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v18, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v18, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, v17, v12 +; GISEL-NEXT: v_mul_hi_u32 v18, v14, v12 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], v7, v18, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v7 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v11, vcc @@ -1898,23 +1711,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v15, v14, v12 ; GISEL-NEXT: v_xor_b32_e32 v16, v1, v7 ; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v17, v12 +; GISEL-NEXT: v_mul_hi_u32 v1, v17, v11 ; GISEL-NEXT: v_mul_hi_u32 v15, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc @@ -1922,28 +1730,22 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v1 ; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 -; GISEL-NEXT: v_mul_hi_u32 v15, v16, v1 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v16, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v0, v11 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v1 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, vcc +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v0, v12, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v16, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v14, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v11 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v12, v11 ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v8, v15, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v14, v[11:12] -; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v16, v11, vcc ; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v16, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 @@ -1993,116 +1795,102 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc ; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v18, v11 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; GISEL-NEXT: v_mul_hi_u32 v12, v19, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v18, v11 +; GISEL-NEXT: v_mul_hi_u32 v16, v19, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v11, v18, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v0 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v18, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v11, 0 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v0 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v18, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v14, v5, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v20, v12, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v13, v7 -; GISEL-NEXT: v_ashrrev_i32_e32 v13, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v11, v[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v20, v17, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[11:12] ; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v8, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v13, vcc -; GISEL-NEXT: v_xor_b32_e32 v5, v2, v13 -; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v4 -; GISEL-NEXT: v_xor_b32_e32 v15, v3, v13 -; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v14, v2 -; GISEL-NEXT: v_mul_hi_u32 v14, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v14 -; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v4 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v16, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v17, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v12, v15, v2 +; GISEL-NEXT: v_xor_b32_e32 v11, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v13, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v15, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GISEL-NEXT: v_mul_hi_u32 v4, v5, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v13, v5, v2 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v4, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v13, 0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v1 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v4, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[0:1] -; GISEL-NEXT: v_xor_b32_e32 v8, v8, v7 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v11, v[3:4] -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v14, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v1 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v13, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v10 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v9 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v11 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v8, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v13 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v14, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc -; GISEL-NEXT: v_xor_b32_e32 v4, v13, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v7, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -2129,131 +1917,119 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v12, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v1, v10, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v13 ; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v11, v1 -; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; CGP-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; CGP-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 -; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 -; CGP-NEXT: v_trunc_f32_e32 v12, v11 -; CGP-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] -; CGP-NEXT: v_mul_hi_u32 v17, v13, v10 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_mul_lo_u32 v12, v16, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v18, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v19, v16, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; CGP-NEXT: v_mul_hi_u32 v17, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v18, v12 +; CGP-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 +; CGP-NEXT: v_subb_u32_e32 v16, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v12, v10 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v17, v12 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0 +; CGP-NEXT: v_mov_b32_e32 v1, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v1, v17, v10 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12] +; CGP-NEXT: v_mul_hi_u32 v12, v14, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v17, v10 +; CGP-NEXT: v_mul_lo_u32 v18, v14, v11 +; CGP-NEXT: v_mul_lo_u32 v19, v17, v11 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v18 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v14, v11 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v19, v10 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v10 -; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] -; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v8, v14 -; CGP-NEXT: v_mul_lo_u32 v8, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 -; CGP-NEXT: v_xor_b32_e32 v17, v9, v14 -; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v16, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_mul_hi_u32 v15, v13, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v16, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v17, v8 -; CGP-NEXT: v_mul_lo_u32 v11, v12, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v12, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v17, v8 -; CGP-NEXT: v_mul_hi_u32 v15, v17, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v17, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v12, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v8, v10 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v17, v11 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v10, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v10 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v1 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, v17, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0 +; CGP-NEXT: v_mov_b32_e32 v1, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2] +; CGP-NEXT: v_ashrrev_i32_e32 v15, 31, v9 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v8, v15 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v15, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v1, v15 +; CGP-NEXT: v_mul_lo_u32 v1, v17, v10 +; CGP-NEXT: v_mul_lo_u32 v9, v14, v11 +; CGP-NEXT: v_xor_b32_e32 v16, v8, v15 +; CGP-NEXT: v_mul_hi_u32 v8, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v17, v11 +; CGP-NEXT: v_mul_hi_u32 v8, v17, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v14, v11 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v10, vcc +; CGP-NEXT: v_mul_hi_u32 v10, v17, v11 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v17, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v16, v1 +; CGP-NEXT: v_mul_lo_u32 v10, v12, v8 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v1 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v16, v8 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v8 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v11, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v1, v10, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v10, v16, v8 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, 0 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v10, v1 +; CGP-NEXT: v_mov_b32_e32 v1, v9 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v14, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v8 ; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v12, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10] -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v17, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v17, v9 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 -; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v16, v9, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v16, v9 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v13 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v13 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v4 ; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, v12, v15, s[4:5] -; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v13 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v12, s[4:5] +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v14, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v4 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v4, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v12 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v13 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v8, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v8, v14, v0 -; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v8, v15, v0 +; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v4, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 @@ -2304,133 +2080,116 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: .LBB8_7: ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v10, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v6, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v10, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v11, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v10 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v6, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] -; CGP-NEXT: v_mul_lo_u32 v6, v14, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_mul_hi_u32 v10, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v10, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v10 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v8, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v8 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_mul_hi_u32 v9, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 ; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 -; CGP-NEXT: v_mul_lo_u32 v15, v11, v9 -; CGP-NEXT: v_mul_lo_u32 v16, v14, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v16, v8 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v14, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v6 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v6, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] ; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v10, v5, v12 -; CGP-NEXT: v_mul_lo_u32 v5, v14, v8 -; CGP-NEXT: v_mul_lo_u32 v7, v11, v9 -; CGP-NEXT: v_xor_b32_e32 v13, v6, v12 -; CGP-NEXT: v_mul_hi_u32 v6, v11, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v12 +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v9, v11, v8 +; CGP-NEXT: v_xor_b32_e32 v13, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v14, v8 +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v14, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v11, v9 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_mul_hi_u32 v8, v14, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v14, v6, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v6 -; CGP-NEXT: v_mul_hi_u32 v9, v10, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 -; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v13, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_mul_hi_u32 v8, v10, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v14, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v8, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, 0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v7 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v8, v[6:7] -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[6:7] -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v13, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], v3, v9, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v3, v8, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v4 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v9, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v8, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v13, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[4:5] -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v11, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v7, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v5, v12, v2 -; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 @@ -2536,235 +2295,204 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], 0, 0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 ; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v9, v7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v9 ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, v8 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v14, v10, v7 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 ; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v14, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v4 ; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v7, vcc ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, v8 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v13, v7 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] ; GISEL-NEXT: v_mul_hi_u32 v0, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 ; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_and_b32_e32 v12, 0xffffff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v4, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v13, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v7 ; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_and_b32_e32 v10, 0xffffff, v2 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v7 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v7 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v4, 0 +; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[5:6] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9] -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v11, v7 -; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[8:9] ; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v2 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 -; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v11, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v6, v1 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v5, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v4 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, 0, v2 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v11, 0 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v3, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v13, v[5:6] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v10 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v11, v[5:6] +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v4, v[8:9] +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v11, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v2 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v1 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v7, s[4:5] +; GISEL-NEXT: v_trunc_f32_e32 v7, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v5 +; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v2 +; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v7 +; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v8, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v9, v1 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7] +; GISEL-NEXT: v_mul_lo_u32 v7, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v20, v12, v5 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v0, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v18 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, v15, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_mul_hi_u32 v20, v12, v6 +; GISEL-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v18, vcc +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v5 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v13, v4 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v18, v1, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v13, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v19, v1, vcc +; GISEL-NEXT: v_mov_b32_e32 v1, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[1:2] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v16 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v17, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v15, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 0, v10 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v15, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v6 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v5, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v5, v13, v5 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v1 -; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v13, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v8, 0 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v16 -; GISEL-NEXT: v_mov_b32_e32 v1, v5 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v11, v[1:2] -; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v8, v[5:6] -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v13, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v9, v17, v18, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_mul_lo_u32 v6, v11, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v8, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], 0, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 -; GISEL-NEXT: v_mul_hi_u32 v7, v8, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v12, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 -; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], v11, v5, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v10, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v9, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v10, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v10, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v15, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v6 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v10, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v8, v4, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v6 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v5, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v6, v9 ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[0:1] ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 ; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, v8, v10, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v8 ; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc @@ -2772,13 +2500,13 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v7 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index b666f45..73157c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -147,23 +147,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_ashr_i32 s2, s9, 31 -; GFX8-NEXT: s_ashr_i32 s12, s11, 31 -; GFX8-NEXT: s_add_u32 s0, s8, s2 -; GFX8-NEXT: s_addc_u32 s1, s9, s2 -; GFX8-NEXT: s_add_u32 s8, s10, s12 -; GFX8-NEXT: s_mov_b32 s13, s12 -; GFX8-NEXT: s_addc_u32 s9, s11, s12 -; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] +; GFX8-NEXT: s_ashr_i32 s12, s9, 31 +; GFX8-NEXT: s_ashr_i32 s14, s11, 31 +; GFX8-NEXT: s_add_u32 s0, s8, s12 +; GFX8-NEXT: s_addc_u32 s1, s9, s12 +; GFX8-NEXT: s_add_u32 s2, s10, s14 +; GFX8-NEXT: s_mov_b32 s15, s14 +; GFX8-NEXT: s_addc_u32 s3, s11, s14 +; GFX8-NEXT: s_xor_b64 s[8:9], s[2:3], s[14:15] ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX8-NEXT: s_mov_b32 s3, s2 -; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] +; GFX8-NEXT: s_mov_b32 s13, s12 +; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[12:13] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s14, 0, s8 -; GFX8-NEXT: s_subb_u32 s15, 0, s9 +; GFX8-NEXT: s_sub_u32 s18, 0, s8 +; GFX8-NEXT: s_subb_u32 s19, 0, s9 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX8-NEXT: v_trunc_f32_e32 v2, v1 @@ -171,10 +171,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -182,44 +182,34 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[16:17], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -227,28 +217,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s11, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s11, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v6, s11 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s10, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s11, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s10, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s9 ; GFX8-NEXT: v_subb_u32_e64 v2, s[0:1], v6, v1, vcc ; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s11, v1 @@ -261,8 +246,8 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v0 ; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 @@ -280,20 +265,20 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX8-NEXT: v_xor_b32_e32 v0, s0, v4 -; GFX8-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX8-NEXT: s_xor_b64 s[0:1], s[12:13], s[14:15] +; GFX8-NEXT: v_xor_b32_e32 v0, s0, v3 +; GFX8-NEXT: v_xor_b32_e32 v1, s1, v4 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc -; GFX8-NEXT: v_xor_b32_e32 v3, s2, v5 -; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2 -; GFX8-NEXT: v_mov_b32_e32 v5, s2 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3 +; GFX8-NEXT: v_xor_b32_e32 v3, s12, v5 +; GFX8-NEXT: v_xor_b32_e32 v4, s12, v2 +; GFX8-NEXT: v_mov_b32_e32 v5, s12 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s12, v3 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v4, v5, vcc ; GFX8-NEXT: v_mov_b32_e32 v4, s4 ; GFX8-NEXT: v_mov_b32_e32 v5, s5 @@ -307,23 +292,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s2, s9, 31 -; GFX9-NEXT: s_ashr_i32 s12, s11, 31 -; GFX9-NEXT: s_add_u32 s0, s8, s2 -; GFX9-NEXT: s_addc_u32 s1, s9, s2 -; GFX9-NEXT: s_add_u32 s8, s10, s12 -; GFX9-NEXT: s_mov_b32 s13, s12 -; GFX9-NEXT: s_addc_u32 s9, s11, s12 -; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] +; GFX9-NEXT: s_ashr_i32 s12, s9, 31 +; GFX9-NEXT: s_ashr_i32 s14, s11, 31 +; GFX9-NEXT: s_add_u32 s0, s8, s12 +; GFX9-NEXT: s_addc_u32 s1, s9, s12 +; GFX9-NEXT: s_add_u32 s2, s10, s14 +; GFX9-NEXT: s_mov_b32 s15, s14 +; GFX9-NEXT: s_addc_u32 s3, s11, s14 +; GFX9-NEXT: s_xor_b64 s[8:9], s[2:3], s[14:15] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] +; GFX9-NEXT: s_mov_b32 s13, s12 +; GFX9-NEXT: s_xor_b64 s[10:11], s[0:1], s[12:13] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s14, 0, s8 -; GFX9-NEXT: s_subb_u32 s15, 0, s9 +; GFX9-NEXT: s_sub_u32 s18, 0, s8 +; GFX9-NEXT: s_subb_u32 s19, 0, s9 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -331,10 +316,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -342,92 +327,79 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[16:17], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s11, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s11, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] ; GFX9-NEXT: v_mov_b32_e32 v6, s11 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s10, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, s11, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v5, s9 ; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 ; GFX9-NEXT: v_sub_u32_e32 v1, s11, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[0:1] +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[0:1] ; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v0 ; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s8, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc @@ -435,26 +407,27 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] -; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX9-NEXT: v_xor_b32_e32 v0, s0, v5 -; GFX9-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX9-NEXT: s_xor_b64 s[0:1], s[12:13], s[14:15] +; GFX9-NEXT: v_xor_b32_e32 v0, s0, v3 +; GFX9-NEXT: v_xor_b32_e32 v1, s1, v4 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: v_xor_b32_e32 v3, s2, v6 -; GFX9-NEXT: v_xor_b32_e32 v5, s2, v2 -; GFX9-NEXT: v_mov_b32_e32 v6, s2 -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v6, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] +; GFX9-NEXT: v_xor_b32_e32 v3, s12, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-NEXT: v_xor_b32_e32 v4, s12, v2 +; GFX9-NEXT: v_mov_b32_e32 v5, s12 +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s12, v3 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v5, vcc +; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_i64: @@ -470,10 +443,11 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: s_addc_u32 s9, s11, s12 ; GFX10-NEXT: s_mov_b32 s3, s2 ; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] -; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s9 ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX10-NEXT: s_sub_u32 s10, 0, s8 +; GFX10-NEXT: s_sub_u32 s14, 0, s8 +; GFX10-NEXT: s_subb_u32 s15, 0, s9 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -484,85 +458,72 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s11, s10, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s11, s10, v4, v[1:2] -; GFX10-NEXT: s_subb_u32 s11, 0, s9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s14, s11, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s14, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v6, s14, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v0, s14, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v2, s14, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s14 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s14, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s14 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s16, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s10, v4, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s11, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v6, s10, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v2, s10, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX10-NEXT: v_mul_lo_u32 v5, s1, v1 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v0, s10, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_co_u32 v0, s10, v0, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v5, s10, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, s1, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v5, 0 -; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s8, v3, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s9, v5, v[1:2] -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v6, s1, v1 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v4, s10, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, s11, v0 +; GFX10-NEXT: v_mul_lo_u32 v3, s10, v1 +; GFX10-NEXT: v_mul_lo_u32 v5, s11, v1 +; GFX10-NEXT: v_mul_hi_u32 v6, s10, v1 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v3 +; GFX10-NEXT: v_add_co_u32 v0, s0, v5, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v6, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_mul_hi_u32 v2, s11, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s8, v4, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v2, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s8, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s9, v4, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s10, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v6, s11, v1 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s11, v1, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s9, v6, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo @@ -576,7 +537,7 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 ; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v5, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v1 @@ -586,12 +547,12 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v7, v8, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo ; GFX10-NEXT: s_xor_b64 s[8:9], s[2:3], s[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 @@ -1272,25 +1233,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-LABEL: sdivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_ashr_i32 s4, s13, 31 -; GFX8-NEXT: s_ashr_i32 s6, s1, 31 -; GFX8-NEXT: s_add_u32 s16, s12, s4 -; GFX8-NEXT: s_addc_u32 s17, s13, s4 -; GFX8-NEXT: s_add_u32 s0, s0, s6 -; GFX8-NEXT: s_mov_b32 s7, s6 -; GFX8-NEXT: s_addc_u32 s1, s1, s6 -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] +; GFX8-NEXT: s_ashr_i32 s16, s13, 31 +; GFX8-NEXT: s_ashr_i32 s18, s5, 31 +; GFX8-NEXT: s_add_u32 s0, s12, s16 +; GFX8-NEXT: s_addc_u32 s1, s13, s16 +; GFX8-NEXT: s_add_u32 s2, s4, s18 +; GFX8-NEXT: s_mov_b32 s19, s18 +; GFX8-NEXT: s_addc_u32 s3, s5, s18 +; GFX8-NEXT: s_xor_b64 s[12:13], s[2:3], s[18:19] ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX8-NEXT: s_mov_b32 s5, s4 -; GFX8-NEXT: s_xor_b64 s[16:17], s[16:17], s[4:5] +; GFX8-NEXT: s_mov_b32 s17, s16 +; GFX8-NEXT: s_xor_b64 s[4:5], s[0:1], s[16:17] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s18, 0, s12 -; GFX8-NEXT: s_subb_u32 s19, 0, s13 +; GFX8-NEXT: s_sub_u32 s22, 0, s12 +; GFX8-NEXT: s_subb_u32 s23, 0, s13 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX8-NEXT: v_trunc_f32_e32 v2, v1 @@ -1298,10 +1259,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1309,243 +1270,213 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[20:21], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX8-NEXT: s_xor_b64 s[20:21], s[16:17], s[18:19] +; GFX8-NEXT: s_ashr_i32 s18, s7, 31 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX8-NEXT: s_mov_b32 s19, s18 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX8-NEXT: s_xor_b64 s[18:19], s[4:5], s[6:7] -; GFX8-NEXT: s_ashr_i32 s6, s15, 31 -; GFX8-NEXT: s_mov_b32 s7, s6 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s17, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s16, v1 -; GFX8-NEXT: v_mul_hi_u32 v4, s16, v0 -; GFX8-NEXT: v_mul_hi_u32 v0, s17, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s17, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, s5, v0 +; GFX8-NEXT: v_mul_lo_u32 v3, s4, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX8-NEXT: v_mov_b32_e32 v6, s5 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s17, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s16, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v3, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v6, s17 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s16, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s5, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s4, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s5, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s4, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s13 -; GFX8-NEXT: s_ashr_i32 s16, s3, 31 +; GFX8-NEXT: s_ashr_i32 s4, s15, 31 ; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc -; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s17, v1 +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s5, v1 ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v6 ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 ; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v6 -; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s12, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1] -; GFX8-NEXT: v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc -; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 +; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, s12, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v8, v1, v2, s[0:1] +; GFX8-NEXT: v_subbrev_u32_e64 v10, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v11, s[0:1], 0, v4, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v1 -; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX8-NEXT: s_add_u32 s0, s14, s6 -; GFX8-NEXT: s_addc_u32 s1, s15, s6 -; GFX8-NEXT: s_add_u32 s2, s2, s16 -; GFX8-NEXT: s_mov_b32 s17, s16 -; GFX8-NEXT: s_addc_u32 s3, s3, s16 -; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] -; GFX8-NEXT: v_cvt_f32_u32_e32 v14, s3 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v2 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v11, s[0:1] +; GFX8-NEXT: s_add_u32 s0, s14, s4 +; GFX8-NEXT: s_addc_u32 s1, s15, s4 +; GFX8-NEXT: s_add_u32 s2, s6, s18 +; GFX8-NEXT: s_addc_u32 s3, s7, s18 +; GFX8-NEXT: s_xor_b64 s[6:7], s[2:3], s[18:19] +; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX8-NEXT: v_cvt_f32_u32_e32 v15, s6 ; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s2 -; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, s12, v8 -; GFX8-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v0, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14 -; GFX8-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v5, v1, v12, vcc -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; GFX8-NEXT: v_add_f32_e32 v1, v1, v15 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s12, v9 +; GFX8-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v0, vcc +; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v11, v1 -; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v11 +; GFX8-NEXT: v_trunc_f32_e32 v16, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v16 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v0 -; GFX8-NEXT: s_sub_u32 s5, 0, s2 -; GFX8-NEXT: s_subb_u32 s20, 0, s3 -; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v12, 0 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1] -; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v10, v3, v10, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v15, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[14:15], s5, v5, v[1:2] -; GFX8-NEXT: v_mul_lo_u32 v3, v5, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[14:15], s20, v12, v[1:2] -; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v16, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1] -; GFX8-NEXT: v_mul_lo_u32 v8, v12, v1 -; GFX8-NEXT: v_mul_hi_u32 v2, v12, v0 -; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0 -; GFX8-NEXT: v_xor_b32_e32 v9, s19, v10 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v3, v5, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_mul_hi_u32 v8, v12, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_cvt_u32_f32_e32 v17, v0 +; GFX8-NEXT: s_mov_b32 s5, s4 +; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[4:5] +; GFX8-NEXT: s_sub_u32 s14, 0, s6 +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v17, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc +; GFX8-NEXT: v_cvt_u32_f32_e32 v13, v16 +; GFX8-NEXT: s_subb_u32 s15, 0, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v13, v[1:2] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, v3, v12, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s15, v17, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v13, v0 +; GFX8-NEXT: v_mul_hi_u32 v10, v17, v0 +; GFX8-NEXT: v_mul_lo_u32 v3, v17, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc +; GFX8-NEXT: v_mul_hi_u32 v0, v13, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v10, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v13, v1 +; GFX8-NEXT: v_mul_hi_u32 v10, v17, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_mul_hi_u32 v1, v13, v1 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v10, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v12, v0 -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v8, 0 -; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc -; GFX8-NEXT: v_xor_b32_e32 v1, s18, v4 +; GFX8-NEXT: v_add_u32_e32 v10, vcc, v17, v0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[2:3], s14, v10, 0 +; GFX8-NEXT: v_addc_u32_e32 v11, vcc, v13, v1, vcc ; GFX8-NEXT: v_mov_b32_e32 v0, v3 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v5, v[0:1] -; GFX8-NEXT: v_mov_b32_e32 v10, s19 -; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s18, v1 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v8, v[3:4] -; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v9, v10, vcc -; GFX8-NEXT: v_xor_b32_e32 v4, s4, v7 -; GFX8-NEXT: v_mul_lo_u32 v7, v5, v2 -; GFX8-NEXT: v_mul_lo_u32 v9, v8, v3 -; GFX8-NEXT: v_mul_hi_u32 v11, v8, v2 -; GFX8-NEXT: v_mul_hi_u32 v2, v5, v2 -; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v11, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v8, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v11, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 -; GFX8-NEXT: v_mul_hi_u32 v3, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc -; GFX8-NEXT: v_mov_b32_e32 v10, s4 -; GFX8-NEXT: v_mul_lo_u32 v7, s13, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, s12, v3 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 -; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v10, vcc -; GFX8-NEXT: v_mul_hi_u32 v6, s12, v2 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, s13, v3 +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v11, v[0:1] +; GFX8-NEXT: v_xor_b32_e32 v7, s20, v8 +; GFX8-NEXT: v_xor_b32_e32 v8, s21, v4 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s15, v10, v[0:1] +; GFX8-NEXT: v_mov_b32_e32 v9, s21 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s20, v7 +; GFX8-NEXT: v_xor_b32_e32 v4, s16, v5 +; GFX8-NEXT: v_mul_lo_u32 v5, v11, v2 +; GFX8-NEXT: v_mul_lo_u32 v7, v10, v3 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v8, v9, vcc +; GFX8-NEXT: v_mul_hi_u32 v8, v10, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_mul_hi_u32 v2, v11, v2 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, v11, v3 +; GFX8-NEXT: v_mul_hi_u32 v8, v10, v3 +; GFX8-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v3, v11, v3 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v8, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, s13, v2 +; GFX8-NEXT: v_mul_lo_u32 v7, s12, v3 +; GFX8-NEXT: v_mul_hi_u32 v9, s12, v2 ; GFX8-NEXT: v_mul_hi_u32 v2, s13, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mul_hi_u32 v8, s12, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6 -; GFX8-NEXT: v_mul_hi_u32 v9, s13, v3 -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v8, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6 -; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[3:4] +; GFX8-NEXT: v_mul_hi_u32 v10, s13, v3 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, s13, v3 +; GFX8-NEXT: v_mul_hi_u32 v9, s12, v3 +; GFX8-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v9, vcc +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v2, v7, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s6, v9, 0 +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc +; GFX8-NEXT: v_xor_b32_e32 v6, s16, v6 +; GFX8-NEXT: v_mov_b32_e32 v8, s16 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s16, v4 +; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v8, vcc +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v10, v7 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s6, v8, v[3:4] ; GFX8-NEXT: v_mov_b32_e32 v10, s13 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s12, v2 -; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v8, v[6:7] -; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s7, v9, v[6:7] +; GFX8-NEXT: v_mov_b32_e32 v3, s7 ; GFX8-NEXT: v_subb_u32_e64 v7, s[0:1], v10, v6, vcc ; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s13, v6 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v7 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v7 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v7 ; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s2, v2 +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s6, v2 ; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc -; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v8 -; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v12 +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v9 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v8, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v11 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v11 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v12 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s2, v11 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v12 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v16, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v16, s[0:1], 1, v13 ; GFX8-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc @@ -1556,20 +1487,20 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v13, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v13, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v6, v2, v6, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[6:7], s[16:17] -; GFX8-NEXT: v_xor_b32_e32 v2, s0, v8 -; GFX8-NEXT: v_xor_b32_e32 v3, s1, v9 +; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[18:19] +; GFX8-NEXT: v_xor_b32_e32 v2, s0, v9 +; GFX8-NEXT: v_xor_b32_e32 v3, s1, v8 ; GFX8-NEXT: v_mov_b32_e32 v8, s1 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v2 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc -; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 -; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 -; GFX8-NEXT: v_mov_b32_e32 v8, s6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v6 +; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 +; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7 +; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s4, v6 ; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v7, v8, vcc ; GFX8-NEXT: v_mov_b32_e32 v8, s8 ; GFX8-NEXT: v_mov_b32_e32 v9, s9 @@ -1583,25 +1514,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-LABEL: sdivrem_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX9-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s4, s13, 31 -; GFX9-NEXT: s_ashr_i32 s6, s1, 31 -; GFX9-NEXT: s_add_u32 s16, s12, s4 -; GFX9-NEXT: s_addc_u32 s17, s13, s4 -; GFX9-NEXT: s_add_u32 s0, s0, s6 +; GFX9-NEXT: s_ashr_i32 s6, s17, 31 +; GFX9-NEXT: s_add_u32 s0, s12, s4 +; GFX9-NEXT: s_addc_u32 s1, s13, s4 +; GFX9-NEXT: s_add_u32 s2, s16, s6 ; GFX9-NEXT: s_mov_b32 s7, s6 -; GFX9-NEXT: s_addc_u32 s1, s1, s6 -; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] +; GFX9-NEXT: s_addc_u32 s3, s17, s6 +; GFX9-NEXT: s_xor_b64 s[12:13], s[2:3], s[6:7] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s12 ; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_xor_b64 s[16:17], s[16:17], s[4:5] +; GFX9-NEXT: s_xor_b64 s[16:17], s[0:1], s[4:5] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s18, 0, s12 -; GFX9-NEXT: s_subb_u32 s19, 0, s13 +; GFX9-NEXT: s_sub_u32 s22, 0, s12 +; GFX9-NEXT: s_subb_u32 s23, 0, s13 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -1609,10 +1540,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1620,239 +1551,214 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[20:21], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s13 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0 +; GFX9-NEXT: s_xor_b64 s[20:21], s[4:5], s[6:7] +; GFX9-NEXT: s_ashr_i32 s6, s15, 31 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2] +; GFX9-NEXT: s_mov_b32 s7, s6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX9-NEXT: s_xor_b64 s[18:19], s[4:5], s[6:7] -; GFX9-NEXT: s_ashr_i32 s6, s15, 31 -; GFX9-NEXT: s_mov_b32 s7, s6 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s17, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s16, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s16, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s17, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s17, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s17, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s16, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v4, v3, v0, v6 -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s12, v4, v[0:1] ; GFX9-NEXT: v_mov_b32_e32 v6, s17 -; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, s16, v1 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s13, v5, v[2:3] -; GFX9-NEXT: s_ashr_i32 s16, s3, 31 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v2, vcc -; GFX9-NEXT: v_sub_u32_e32 v1, s17, v2 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s17, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s16, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v3, s17, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v3, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v5, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s16, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v3, s13 +; GFX9-NEXT: s_ashr_i32 s16, s19, 31 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX9-NEXT: v_sub_u32_e32 v1, s17, v1 ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v6 -; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s12, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v3, s[0:1] -; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v3, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v4, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s12, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v2, v8, s[0:1] +; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e64 v11, s[0:1], 1, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v5, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v10 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v10 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v2, v13, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v3 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v12, s[0:1] ; GFX9-NEXT: s_add_u32 s0, s14, s6 ; GFX9-NEXT: s_addc_u32 s1, s15, s6 -; GFX9-NEXT: s_add_u32 s2, s2, s16 +; GFX9-NEXT: s_add_u32 s2, s18, s16 ; GFX9-NEXT: s_mov_b32 s17, s16 -; GFX9-NEXT: s_addc_u32 s3, s3, s16 -; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v16, s2 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc +; GFX9-NEXT: s_addc_u32 s3, s19, s16 +; GFX9-NEXT: s_xor_b64 s[14:15], s[2:3], s[16:17] +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v16, s14 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 ; GFX9-NEXT: v_add_f32_e32 v2, v2, v16 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s12, v10 -; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v1, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v16, vcc, s12, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v17, vcc, 0, v1, vcc ; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 -; GFX9-NEXT: v_trunc_f32_e32 v17, v2 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v17 +; GFX9-NEXT: v_trunc_f32_e32 v3, v2 +; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v3 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v18, v1 ; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; GFX9-NEXT: s_sub_u32 s5, 0, s2 +; GFX9-NEXT: s_sub_u32 s5, 0, s14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v18, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v13, v3, v14, vcc -; GFX9-NEXT: v_cvt_u32_f32_e32 v14, v17 -; GFX9-NEXT: s_subb_u32 s20, 0, s3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v3 +; GFX9-NEXT: s_subb_u32 s18, 0, s15 +; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v14, v[2:3] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, v4, v12, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[14:15], s20, v18, v[2:3] -; GFX9-NEXT: v_mul_lo_u32 v3, v14, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v16, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v18, v2 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v13, v[2:3] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v4, v11, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[2:3], s18, v18, v[2:3] +; GFX9-NEXT: v_mul_lo_u32 v3, v13, v1 ; GFX9-NEXT: v_mul_hi_u32 v11, v18, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, v14, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v4, v18, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v16, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v17, vcc +; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v11, v14, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v18, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v14, v2 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v11, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 -; GFX9-NEXT: v_add_u32_e32 v4, v11, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v11, vcc +; GFX9-NEXT: v_mul_lo_u32 v3, v13, v2 +; GFX9-NEXT: v_mul_hi_u32 v11, v18, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[2:3], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v1, s[2:3], v1, v11, vcc +; GFX9-NEXT: v_mul_hi_u32 v2, v13, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v18, v1 -; GFX9-NEXT: v_add3_u32 v2, v4, v3, v2 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[14:15], s5, v11, 0 -; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v14, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[2:3], s5, v11, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v13, v2, vcc ; GFX9-NEXT: v_mov_b32_e32 v1, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[0:1] ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v12, v[1:2] -; GFX9-NEXT: v_xor_b32_e32 v8, s18, v5 -; GFX9-NEXT: v_xor_b32_e32 v9, s19, v9 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s20, v11, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v10, s19 -; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s18, v8 +; GFX9-NEXT: v_xor_b32_e32 v9, s21, v5 +; GFX9-NEXT: v_xor_b32_e32 v8, s20, v8 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s18, v11, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v10, s21 +; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s20, v8 ; GFX9-NEXT: v_xor_b32_e32 v5, s4, v7 ; GFX9-NEXT: v_mul_lo_u32 v7, v12, v3 ; GFX9-NEXT: v_mul_lo_u32 v8, v11, v4 ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v9, v10, vcc ; GFX9-NEXT: v_mul_hi_u32 v9, v11, v3 ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v9, v12, v4 ; GFX9-NEXT: v_mul_hi_u32 v3, v12, v3 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v11, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v9, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, v12, v4 +; GFX9-NEXT: v_mul_hi_u32 v9, v11, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 ; GFX9-NEXT: v_mul_hi_u32 v4, v12, v4 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v9, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 -; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v4, v8, v7, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v11, v3 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v12, v4, vcc ; GFX9-NEXT: v_mul_lo_u32 v7, s13, v3 ; GFX9-NEXT: v_mul_lo_u32 v8, s12, v4 ; GFX9-NEXT: v_mul_hi_u32 v10, s12, v3 ; GFX9-NEXT: v_mul_hi_u32 v3, s13, v3 -; GFX9-NEXT: v_mul_hi_u32 v12, s13, v4 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, s13, v4 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, s12, v4 ; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v10, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v3, v7 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v11, 0 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v10, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, s13, v4 +; GFX9-NEXT: v_mul_hi_u32 v10, s12, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v7, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v10, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v3, v8, vcc +; GFX9-NEXT: v_mul_hi_u32 v8, s13, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v10, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc ; GFX9-NEXT: v_mov_b32_e32 v9, s4 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s4, v5 -; GFX9-NEXT: v_add_u32_e32 v8, v10, v8 ; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_add3_u32 v9, v8, v7, v12 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s2, v9, v[4:5] -; GFX9-NEXT: v_mov_b32_e32 v10, s13 +; GFX9-NEXT: v_add_u32_e32 v9, v8, v7 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v9, v[4:5] +; GFX9-NEXT: v_mov_b32_e32 v11, s13 ; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s12, v3 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s3, v11, v[7:8] -; GFX9-NEXT: v_mov_b32_e32 v4, s3 -; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v10, v7, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v8 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v10, v[7:8] +; GFX9-NEXT: v_mov_b32_e32 v4, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v11, v7, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v8 ; GFX9-NEXT: v_sub_u32_e32 v7, s13, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v8 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v8 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s14, v3 ; GFX9-NEXT: v_subbrev_co_u32_e64 v13, s[0:1], 0, v7, vcc -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v10 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v13 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v13 ; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v12 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v12 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v7, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v13 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s2, v12 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v13 +; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s14, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v17, s[0:1], 1, v14 ; GFX9-NEXT: v_subbrev_co_u32_e32 v4, vcc, 0, v4, vcc @@ -1860,10 +1766,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v17, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v14, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v3, v7, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v4, s[0:1] @@ -1897,26 +1803,26 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: s_addc_u32 s1, s1, s16 ; GFX10-NEXT: s_mov_b32 s5, s4 ; GFX10-NEXT: s_xor_b64 s[6:7], s[0:1], s[16:17] -; GFX10-NEXT: s_xor_b64 s[0:1], s[12:13], s[4:5] +; GFX10-NEXT: s_xor_b64 s[12:13], s[12:13], s[4:5] ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7 -; GFX10-NEXT: s_sub_u32 s21, 0, s6 -; GFX10-NEXT: s_subb_u32 s20, 0, s7 -; GFX10-NEXT: s_ashr_i32 s12, s15, 31 -; GFX10-NEXT: s_xor_b64 s[18:19], s[4:5], s[16:17] -; GFX10-NEXT: s_ashr_i32 s16, s3, 31 -; GFX10-NEXT: s_add_u32 s14, s14, s12 -; GFX10-NEXT: s_addc_u32 s15, s15, s12 +; GFX10-NEXT: s_sub_u32 s25, 0, s6 +; GFX10-NEXT: s_subb_u32 s24, 0, s7 +; GFX10-NEXT: s_xor_b64 s[20:21], s[4:5], s[16:17] +; GFX10-NEXT: s_ashr_i32 s16, s15, 31 +; GFX10-NEXT: s_ashr_i32 s18, s3, 31 +; GFX10-NEXT: s_add_u32 s0, s14, s16 +; GFX10-NEXT: s_addc_u32 s1, s15, s16 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s6 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; GFX10-NEXT: s_add_u32 s2, s2, s16 +; GFX10-NEXT: s_add_u32 s2, s2, s18 +; GFX10-NEXT: s_mov_b32 s19, s18 +; GFX10-NEXT: s_addc_u32 s3, s3, s18 ; GFX10-NEXT: s_mov_b32 s17, s16 -; GFX10-NEXT: s_addc_u32 s3, s3, s16 -; GFX10-NEXT: s_mov_b32 s13, s12 -; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[16:17] +; GFX10-NEXT: s_xor_b64 s[14:15], s[2:3], s[18:19] ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s2 -; GFX10-NEXT: s_xor_b64 s[14:15], s[14:15], s[12:13] +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 +; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX10-NEXT: s_xor_b64 s[22:23], s[0:1], s[16:17] ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 @@ -1927,255 +1833,230 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v1 ; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 ; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v2 +; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 -; GFX10-NEXT: v_trunc_f32_e32 v6, v4 -; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0 -; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v6 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s21, v7, 0 -; GFX10-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX10-NEXT: s_sub_u32 s5, 0, s2 +; GFX10-NEXT: v_trunc_f32_e32 v4, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v0 +; GFX10-NEXT: v_mul_f32_e32 v5, 0xcf800000, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s25, v6, 0 +; GFX10-NEXT: v_add_f32_e32 v3, v5, v3 +; GFX10-NEXT: s_sub_u32 s2, 0, s14 +; GFX10-NEXT: s_subb_u32 s3, 0, s15 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v3 -; GFX10-NEXT: v_mul_hi_u32 v10, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s22, s5, v8, 0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s22, s21, v9, v[1:2] -; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v6 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v10, v7, v0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v1, v3 -; GFX10-NEXT: v_mul_hi_u32 v6, v7, v0 -; GFX10-NEXT: s_subb_u32 s22, 0, s3 +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v0 +; GFX10-NEXT: v_mul_hi_u32 v5, v7, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, v9, v2 ; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2 -; GFX10-NEXT: v_mul_lo_u32 v11, v5, v2 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s23, s20, v7, v[4:5] -; GFX10-NEXT: v_mul_lo_u32 v4, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s5, v5, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v2, v5, v2 -; GFX10-NEXT: v_mul_lo_u32 v13, v7, v3 -; GFX10-NEXT: v_mul_lo_u32 v14, v9, v3 -; GFX10-NEXT: v_mul_hi_u32 v15, v7, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s22, v8, v[0:1] -; GFX10-NEXT: v_mul_hi_u32 v1, v9, v3 -; GFX10-NEXT: v_add_co_u32 v3, s23, v4, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v10, s23, v14, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s23 -; GFX10-NEXT: v_mul_lo_u32 v14, v8, v0 -; GFX10-NEXT: v_add_co_u32 v3, s23, v3, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v6, s23, v10, v15 -; GFX10-NEXT: v_mul_lo_u32 v15, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 -; GFX10-NEXT: v_mul_hi_u32 v16, v8, v0 -; GFX10-NEXT: v_mul_hi_u32 v17, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v4, v3 -; GFX10-NEXT: v_add_co_u32 v4, s23, v11, v14 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v13, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v2, s23, v15, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v0, s23, v6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v4, s23, v4, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 -; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v16 -; GFX10-NEXT: v_add3_u32 v1, v3, v6, v1 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v7, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v10, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s23 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v9, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s21, v6, 0 -; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 -; GFX10-NEXT: v_mov_b32_e32 v10, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v13, v6, v4 +; GFX10-NEXT: v_mul_lo_u32 v14, v7, v4 +; GFX10-NEXT: v_mul_hi_u32 v15, v6, v4 +; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2 +; GFX10-NEXT: v_mul_hi_u32 v4, v7, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1] +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v10, v13 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, s0, v14, v5 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v10, v8, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v15, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_lo_u32 v13, v9, v0 +; GFX10-NEXT: v_mul_hi_u32 v14, v8, v0 +; GFX10-NEXT: v_mul_hi_u32 v15, v9, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v1, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v11, v10 +; GFX10-NEXT: v_add_co_u32 v2, s0, v13, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v4, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v12, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v14, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v2, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s25, v6, 0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 -; GFX10-NEXT: v_mul_hi_u32 v11, v7, v0 -; GFX10-NEXT: v_add3_u32 v3, v4, v3, v17 -; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v3, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s23, s5, v8, 0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s21, s21, v7, v[1:2] +; GFX10-NEXT: v_add_nc_u32_e32 v3, v15, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mul_lo_u32 v10, v7, v0 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v1, v3 -; GFX10-NEXT: v_mul_lo_u32 v12, v9, v2 -; GFX10-NEXT: v_mul_hi_u32 v13, v8, v2 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s20, s20, v6, v[4:5] -; GFX10-NEXT: v_mul_lo_u32 v4, v7, v0 -; GFX10-NEXT: v_mul_hi_u32 v5, v6, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s5, v9, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v0 +; GFX10-NEXT: v_mul_hi_u32 v5, v7, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, v9, v2 +; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v13, v6, v4 +; GFX10-NEXT: v_mul_lo_u32 v14, v7, v4 +; GFX10-NEXT: v_mul_hi_u32 v15, v6, v4 +; GFX10-NEXT: v_mul_hi_u32 v4, v7, v4 ; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2 -; GFX10-NEXT: v_mul_lo_u32 v14, v6, v3 -; GFX10-NEXT: v_mul_lo_u32 v15, v7, v3 -; GFX10-NEXT: v_mul_hi_u32 v16, v6, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s22, v8, v[0:1] -; GFX10-NEXT: v_mul_hi_u32 v1, v7, v3 -; GFX10-NEXT: v_add_co_u32 v3, s5, v4, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v11, s5, v15, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v5 -; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v5, s5, v11, v16 -; GFX10-NEXT: v_mul_lo_u32 v16, v9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1] +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v10, v13 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, s0, v14, v5 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v10, v8, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v15, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_lo_u32 v13, v9, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v3, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v11, v10 +; GFX10-NEXT: v_mul_hi_u32 v14, v8, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v13, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v12, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v6, v1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v7, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v14, s0 +; GFX10-NEXT: v_mul_lo_u32 v6, s13, v1 +; GFX10-NEXT: v_mul_lo_u32 v10, s12, v3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_hi_u32 v7, s12, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, s13, v1 +; GFX10-NEXT: v_mul_lo_u32 v11, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v2, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v5, s12, v3 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v10 +; GFX10-NEXT: v_add_co_u32 v1, s0, v11, v1 ; GFX10-NEXT: v_mul_hi_u32 v0, v9, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v14, v11 -; GFX10-NEXT: v_add_co_u32 v11, s5, v12, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v2, s5, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v11, s5, v11, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v17 -; GFX10-NEXT: v_add3_u32 v1, v4, v5, v1 -; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v1, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v6, s1, v3 -; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v14, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_mul_lo_u32 v11, s0, v1 -; GFX10-NEXT: v_mul_hi_u32 v7, s0, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX10-NEXT: v_mul_lo_u32 v12, s1, v1 -; GFX10-NEXT: v_add3_u32 v0, v5, v4, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v7, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, v1, v5, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s1 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v0, v4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, vcc_lo, v1, v5, s0 +; GFX10-NEXT: v_mul_hi_u32 v3, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 -; GFX10-NEXT: v_mul_hi_u32 v4, s0, v1 -; GFX10-NEXT: v_mul_hi_u32 v5, s1, v1 -; GFX10-NEXT: v_add_co_u32 v1, s5, v6, v11 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v9, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v12, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v7 -; GFX10-NEXT: v_mul_lo_u32 v0, s15, v2 -; GFX10-NEXT: v_mul_lo_u32 v12, s14, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v4 -; GFX10-NEXT: v_mul_hi_u32 v9, s14, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10-NEXT: v_mul_hi_u32 v2, s15, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, s15, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v6, v1 -; GFX10-NEXT: v_add_co_u32 v6, s5, v0, v12 -; GFX10-NEXT: v_mul_hi_u32 v13, s14, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v12, s5, v3, v1 -; GFX10-NEXT: v_add_co_u32 v2, s20, v7, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s6, v12, 0 -; GFX10-NEXT: v_add_co_u32 v6, s5, v6, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v9, s5, v2, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s20 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 -; GFX10-NEXT: v_add3_u32 v4, v4, v7, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v11, v6 -; GFX10-NEXT: v_mul_hi_u32 v5, s15, v8 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s6, v4, v[1:2] -; GFX10-NEXT: v_add_co_u32 v6, s5, v9, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v4, vcc_lo -; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v7, 1 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s7, v12, v[1:2] -; GFX10-NEXT: v_add3_u32 v5, v3, v9, v5 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s2, v6, 0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v8, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s0, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v9, s1, v1 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s0, s1, v1, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v14, s6 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v14 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1] -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v19, v18, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v15 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v17, v20, v17, s0 -; GFX10-NEXT: v_sub_co_u32 v1, s0, v3, s6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v9, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s6, v5, 0 +; GFX10-NEXT: v_mul_lo_u32 v7, s23, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v6 +; GFX10-NEXT: v_mul_lo_u32 v6, s22, v4 +; GFX10-NEXT: v_mul_hi_u32 v9, s22, v2 +; GFX10-NEXT: v_mul_hi_u32 v10, s23, v2 +; GFX10-NEXT: v_mul_lo_u32 v11, s23, v4 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s6, v3, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v12, s22, v4 +; GFX10-NEXT: v_mul_hi_u32 v4, s23, v4 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v7, v6 +; GFX10-NEXT: v_sub_co_u32 v6, s2, s12, v0 +; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v9, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s3, 0, 0, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s7, v5, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, s0, v11, v10 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s1, v2, v12, s0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, vcc_lo, s13, v1, s2 +; GFX10-NEXT: v_sub_nc_u32_e32 v7, s13, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, v2, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v7, vcc_lo, s7, v7, s2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v11, vcc_lo, v6, s6 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s2, 0, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v13, s2, v5, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s2, 0, v3, s2 +; GFX10-NEXT: v_cmp_eq_u32_e64 s2, s7, v9 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s7, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, 0, 0, s1 +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s7, v12 +; GFX10-NEXT: v_cmp_le_u32_e64 s1, s6, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s0, 0, v1, s0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s14, v17, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, -1, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v12 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v16, s0 +; GFX10-NEXT: v_add_co_u32 v16, s0, v13, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v14, s0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2] +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX10-NEXT: v_sub_co_u32 v2, s0, v11, s6 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v7, s0, 0, v7, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v10 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v2, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s15, v17, v[1:2] +; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v18, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v13, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s22, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v10, s1, s23, v1, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s23, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v14, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v10 +; GFX10-NEXT: v_xor_b32_e32 v0, s20, v2 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v1, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v7 +; GFX10-NEXT: v_xor_b32_e32 v2, s21, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, -1, s0 +; GFX10-NEXT: v_xor_b32_e32 v5, s4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v7, s14 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v9, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v0, s0, v0, s20 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s21, v2, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v10 +; GFX10-NEXT: v_xor_b32_e32 v2, s4, v6 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 +; GFX10-NEXT: v_add_co_u32 v14, s0, v17, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, 0, v4, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v11, s0 +; GFX10-NEXT: v_add_co_u32 v11, s0, v14, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v15, s0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: v_sub_co_u32 v6, s0, v12, s14 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s3, v6, v[0:1] -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s14, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v8, s0 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s1, s15, v0, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s15, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v14, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v8 -; GFX10-NEXT: v_xor_b32_e32 v1, s18, v1 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 -; GFX10-NEXT: v_xor_b32_e32 v4, s19, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 -; GFX10-NEXT: v_xor_b32_e32 v3, s4, v3 -; GFX10-NEXT: v_xor_b32_e32 v7, s4, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v2, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s0, 0, v11, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v0, s0, v1, s18 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s19, v4, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v8 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v4, v9, v12, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 -; GFX10-NEXT: v_add_co_u32 v15, s0, v6, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v5, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v12, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, v15, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: v_sub_co_u32 v9, s0, v13, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v11, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v12, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v4 -; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v13, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v12, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v5, v15, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v4, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v9, s0 -; GFX10-NEXT: s_xor_b64 s[0:1], s[12:13], s[16:17] -; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v3, s4 -; GFX10-NEXT: v_xor_b32_e32 v3, s0, v6 -; GFX10-NEXT: v_xor_b32_e32 v6, s1, v11 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v7, vcc_lo -; GFX10-NEXT: v_xor_b32_e32 v7, s12, v2 -; GFX10-NEXT: v_xor_b32_e32 v8, s12, v8 -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s0 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v7, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s12, v8, vcc_lo -; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[8:9] -; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v11, v14, v11, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v12, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v13, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v9, v17, v11, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v11, v4, v14, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s0 +; GFX10-NEXT: s_xor_b64 s[0:1], s[16:17], s[18:19] +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v2, s4 +; GFX10-NEXT: v_xor_b32_e32 v2, s0, v9 +; GFX10-NEXT: v_xor_b32_e32 v7, s1, v11 +; GFX10-NEXT: v_xor_b32_e32 v9, s16, v3 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v5, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v10, s16, v6 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v2, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v7, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v9, s16 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s16, v10, vcc_lo +; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] +; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[10:11] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, ptr addrspace(1) %out0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 83ebc84..8aa2238 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -24,135 +24,120 @@ define i64 @v_srem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v1 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_xor_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v6, v3 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v6 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v6 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4] -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v3, vcc -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9 -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v10, v4, v9 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v1 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v2, v4 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v7, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v7, v[3:4] -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v9, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v1 +; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v0, v7, vcc +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v2, v0 -; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v9 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -229,18 +214,13 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc @@ -252,21 +232,16 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 ; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -274,41 +249,36 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1 ; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1 +; CHECK-NEXT: v_mov_b32_e32 v5, s11 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 -; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, s10, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, s11, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v5, s11 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0 -; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v3, s9 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v4, s9 ; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[0:1] +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -394,21 +364,16 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 ; GISEL-NEXT: v_mul_lo_u32 v11, v12, v10 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v16, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v10 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v16, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v4 ; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v9, vcc @@ -424,23 +389,18 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_lo_u32 v13, v12, v10 ; GISEL-NEXT: v_xor_b32_e32 v14, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v12, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v1, v15, v9 ; GISEL-NEXT: v_mul_hi_u32 v13, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v15, v1, vcc @@ -448,159 +408,139 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_lo_u32 v10, v11, v1 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 -; GISEL-NEXT: v_mul_hi_u32 v13, v14, v1 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v14, v1 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v10, v11, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v9 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v14, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v11, v1 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v0, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v14, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v12, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v9, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v11, v0 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v8, v12, v[9:10] -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v11, v0 -; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v14, v9, vcc +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v14, v9, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v14, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v12, v1, v9, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v0, v8, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v8 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v0, v8, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, v1, v9, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v0 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v0, vcc ; GISEL-NEXT: v_xor_b32_e32 v6, v1, v0 ; GISEL-NEXT: v_xor_b32_e32 v7, v7, v0 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 -; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v9, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v11, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc ; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v14, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v5 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, v15, v1, s[4:5] ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v16, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v16 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v0 -; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, v7, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16 -; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v9, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v18, v16, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v17, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v20, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v16, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v17, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v14, v17, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v17, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v8, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v16, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v13, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v18, v14, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v10, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v13, v[8:9] +; GISEL-NEXT: v_trunc_f32_e32 v17, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v17 +; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v19, s[4:5], 0, v6 +; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, v7, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v17 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v21, v9, v10, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v14, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v18, v[9:10] +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v15, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v18, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v10 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v8 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 +; GISEL-NEXT: v_mul_hi_u32 v8, v17, v8 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v0 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v17, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v10, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v19, v14, v[1:2] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v20, v10, v[8:9] +; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc +; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 ; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 -; GISEL-NEXT: v_mul_lo_u32 v9, v13, v8 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 -; GISEL-NEXT: v_mul_hi_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 +; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v3 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v2 ; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v10, 0 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[8:9] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v10, v[8:9] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 @@ -611,24 +551,24 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v6 ; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v7 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64: @@ -646,131 +586,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v1 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 -; CGP-NEXT: v_xor_b32_e32 v1, v2, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v0 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; CGP-NEXT: v_trunc_f32_e32 v4, v3 -; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v2 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4] -; CGP-NEXT: v_mul_hi_u32 v15, v5, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4] -; CGP-NEXT: v_mul_lo_u32 v4, v14, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 -; CGP-NEXT: v_mul_lo_u32 v16, v5, v3 -; CGP-NEXT: v_mul_lo_u32 v17, v14, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v3 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v16, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v17, v2 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v2 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v3, vcc -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v3, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v2, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v16, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v16 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v15, vcc +; CGP-NEXT: v_mul_hi_u32 v15, v5, v1 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v17, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v15, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] ; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4] -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v12 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v12 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v4, v12 -; CGP-NEXT: v_mul_lo_u32 v4, v14, v2 -; CGP-NEXT: v_mul_lo_u32 v13, v5, v3 -; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 +; CGP-NEXT: v_xor_b32_e32 v11, v2, v12 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v5, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 ; CGP-NEXT: v_xor_b32_e32 v10, v10, v12 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v14, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v5, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v14, v3, vcc -; CGP-NEXT: v_mul_lo_u32 v4, v10, v2 -; CGP-NEXT: v_mul_lo_u32 v5, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v13, v11, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v10, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v14, v1 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v1 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v14, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v2, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v13, 0 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v13, v[3:4] -; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v1 +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v0, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v10, v1 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v5, v[1:2] +; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v10, v1, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v10, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v0 -; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v3, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v3 +; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v1 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 ; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v12 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 @@ -815,131 +740,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] ; CGP-NEXT: .LBB2_7: -; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v7 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v3 -; CGP-NEXT: v_xor_b32_e32 v3, v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v3 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v12, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6] -; CGP-NEXT: v_mul_hi_u32 v13, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6] -; CGP-NEXT: v_mul_lo_u32 v6, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v14, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v12, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v14, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v5, v12, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v4 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6] +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v6, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v5 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v6, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6] -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v9, v6, v10 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v11, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v4, v12, v4 +; CGP-NEXT: v_xor_b32_e32 v9, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 ; CGP-NEXT: v_xor_b32_e32 v8, v8, v10 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v12, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_mul_hi_u32 v11, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_hi_u32 v5, v12, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v12, v5, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v5 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 -; CGP-NEXT: v_mul_hi_u32 v12, v8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_mul_hi_u32 v7, v9, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v12, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v8, v2 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v11, 0 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v11, v[5:6] -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v8, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v8, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v8, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v2, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v8, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v7, 0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v8, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v8, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 -; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v4, v2 -; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v6 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -999,18 +909,13 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1025,23 +930,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1051,23 +951,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] @@ -1110,6 +1005,8 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x1000 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1127,18 +1024,13 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1154,23 +1046,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1180,104 +1067,86 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v15, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v13, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc @@ -1285,30 +1154,26 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1323,18 +1188,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2k_denom: @@ -1347,218 +1212,188 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x1000 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v16, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v15, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13 -; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v11 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v18, v11, vcc ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4 -; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; CGP-NEXT: v_sub_i32_e32 v16, vcc, v12, v5 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 ; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v11, -1, v1, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v5 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v18, vcc +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v5 ; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v1, vcc ; CGP-NEXT: v_cndmask_b32_e32 v16, v17, v18, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v16, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v13, v16, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v6 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -1594,18 +1429,13 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2 +; CHECK-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v4, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 ; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc @@ -1620,23 +1450,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 ; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v0, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v7, v2 ; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; CHECK-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc @@ -1646,23 +1471,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 ; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5] @@ -1705,6 +1525,8 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: s_sub_u32 s8, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s9, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GISEL-NEXT: v_trunc_f32_e32 v7, v5 @@ -1722,18 +1544,13 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0 @@ -1749,23 +1566,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc @@ -1775,104 +1587,86 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v0, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v1 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2] ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9] -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0 +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9] ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9] -; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v15, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v13, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v17, v6, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v17 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v7, v8 ; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2] ; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v10 ; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v6 ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc @@ -1880,30 +1674,26 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v0, v6, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v8, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 @@ -1918,18 +1708,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_oddk_denom: @@ -1942,218 +1732,188 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0 -; CGP-NEXT: v_mov_b32_e32 v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8] -; CGP-NEXT: v_mul_hi_u32 v12, v9, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11] -; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v7, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v5 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v10 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v9 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v9 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 -; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc -; CGP-NEXT: v_mov_b32_e32 v4, v14 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_xor_b32_e32 v15, v0, v7 -; CGP-NEXT: v_mul_lo_u32 v0, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v14 -; CGP-NEXT: v_xor_b32_e32 v18, v1, v7 -; CGP-NEXT: v_mul_hi_u32 v1, v16, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v8, v4, vcc +; CGP-NEXT: v_mov_b32_e32 v4, v12 +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v13, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v12 +; CGP-NEXT: v_xor_b32_e32 v18, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v5, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v16, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v16, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; CGP-NEXT: v_mul_hi_u32 v4, v16, v14 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v17, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v18, v0 -; CGP-NEXT: v_mul_lo_u32 v14, v15, v1 -; CGP-NEXT: v_mul_hi_u32 v16, v15, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v16, v12 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v17, vcc +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v13, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v18, v0 -; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v18, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v15, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_mul_hi_u32 v16, v18, v1 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v15, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13 -; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v16, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v18, v1 +; CGP-NEXT: v_mul_hi_u32 v16, v13, v1 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v16, vcc +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v12, vcc +; CGP-NEXT: v_mul_hi_u32 v12, v18, v1 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v11 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v18, v11, vcc ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4 -; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; CGP-NEXT: v_sub_i32_e32 v16, vcc, v12, v5 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 ; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc -; CGP-NEXT: v_mov_b32_e32 v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5] -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1] -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4 +; CGP-NEXT: v_mov_b32_e32 v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v11, -1, v1, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v5 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 -; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc -; CGP-NEXT: v_mul_lo_u32 v19, v8, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4 +; CGP-NEXT: v_cndmask_b32_e32 v10, -1, v18, vcc +; CGP-NEXT: v_mul_lo_u32 v19, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v5 ; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v1, vcc ; CGP-NEXT: v_cndmask_b32_e32 v16, v17, v18, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v5, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2] -; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v16, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v10, v7 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v19 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v15, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v7, v0 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v15, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v1 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v10, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v11, v13, v16, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 -; CGP-NEXT: v_mul_lo_u32 v2, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v6 ; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v3, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v6 +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CGP-NEXT: v_mul_hi_u32 v6, v8, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v4 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v7, v13, v2 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v13, v2 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v6 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 @@ -2186,137 +1946,120 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB7_3: -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v1 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_xor_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v1 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 -; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v5 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v7, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v7 -; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7 -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, v6 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3] -; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v13, v11, v6 +; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v5 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v2, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v1 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v0, vcc, v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v5, vcc -; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, v6 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3] -; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v9 -; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v9, vcc -; CHECK-NEXT: v_xor_b32_e32 v7, v2, v9 -; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5 -; CHECK-NEXT: v_mul_lo_u32 v4, v8, v6 -; CHECK-NEXT: v_xor_b32_e32 v10, v3, v9 -; CHECK-NEXT: v_mul_hi_u32 v3, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v3, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_hi_u32 v5, v11, v6 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, v7, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v7, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v10, v3 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_hi_u32 v5, v7, v3 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v2, v4 -; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4] -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v2, v0 -; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v9, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v1 +; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v0, v7, vcc +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v1 +; CHECK-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v3, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v1 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v9 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -2381,21 +2124,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v11 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v17, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 ; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v4 ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc @@ -2411,23 +2149,18 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v14, v13, v11 ; GISEL-NEXT: v_xor_b32_e32 v15, v1, v4 ; GISEL-NEXT: v_mul_hi_u32 v1, v13, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v1, v16, v10 ; GISEL-NEXT: v_mul_hi_u32 v14, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc @@ -2435,24 +2168,19 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v11, v12, v1 ; GISEL-NEXT: v_mul_hi_u32 v13, v12, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v15, v1 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v10 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v15, v1 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v1 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v11, vcc +; GISEL-NEXT: v_mul_hi_u32 v11, v15, v1 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v5, v10, v[1:2] ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v7, v13, v[10:11] ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v12, v0 @@ -2463,7 +2191,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v7 -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v0, v7, vcc +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v0, v7, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v13, v1, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v0 @@ -2472,64 +2200,58 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_xor_b32_e32 v8, v8, v0 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v8 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v11, v5 -; GISEL-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v11, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc ; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v15, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v5 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, v9, v1, s[4:5] ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v9, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v0 -; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, v8, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v20, v9 -; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v10, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v18, v20, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v14, v5 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[9:10] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v7, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v20, v0 -; GISEL-NEXT: v_mul_lo_u32 v10, v17, v9 -; GISEL-NEXT: v_mul_hi_u32 v14, v17, v0 +; GISEL-NEXT: v_trunc_f32_e32 v17, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v17 +; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v19, s[4:5], 0, v6 +; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, v8, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v17 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v21, v9, v10, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v14, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v20, v18, v[9:10] +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v15, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v18, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v10 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v1, v17, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v18, v9 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v1, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v17, v9 +; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v9, v1 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v20, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v20, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v17, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GISEL-NEXT: v_mul_hi_u32 v9, v20, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v0 -; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v20, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v14, 0 +; GISEL-NEXT: v_addc_u32_e64 v15, s[4:5], v17, v1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v14, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v18, v15, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v15, v[1:2] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v19, v14, v[9:10] -; GISEL-NEXT: v_cndmask_b32_e32 v5, v12, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v20, v14, v[9:10] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v11, v2, v7 @@ -2539,21 +2261,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_hi_u32 v3, v14, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v15, v9 ; GISEL-NEXT: v_mul_hi_u32 v10, v14, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v3, vcc +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v15, v2, vcc @@ -2561,29 +2278,25 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_mul_lo_u32 v9, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v10, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v2 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v0, v9, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2 ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v0 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v9, v[0:1] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v13, v[9:10] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc @@ -2636,134 +2349,116 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v1 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 -; CGP-NEXT: v_xor_b32_e32 v1, v4, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v10, v1 -; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v0 -; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v10 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v12, v10 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v12 -; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v11 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] -; CGP-NEXT: v_mul_hi_u32 v12, v13, v10 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v12 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v12 +; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v10, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v10 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11] +; CGP-NEXT: v_mul_hi_u32 v11, v13, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v17, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v17 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v13, v10 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v18, v0 ; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v17, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v18, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v17 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v17, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v18, v10 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v4 -; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v11 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v11, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2] ; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v14 -; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_add_i32_e32 v1, vcc, v8, v14 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11] ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v14, vcc -; CGP-NEXT: v_xor_b32_e32 v12, v4, v14 -; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 -; CGP-NEXT: v_mul_lo_u32 v9, v13, v11 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v9, v13, v10 ; CGP-NEXT: v_xor_b32_e32 v15, v8, v14 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v13, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_mul_hi_u32 v8, v13, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v16, v10 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v16, v10 +; CGP-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v0, vcc, v0, v8, vcc +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v16, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v15, v0 +; CGP-NEXT: v_mul_lo_u32 v9, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v16, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v15, v1 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v1 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v10, vcc +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v0, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v9, v15, v1 +; CGP-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v16, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v15, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v12, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v15, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v15, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v12, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v4 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10] -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v15, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v15, v9 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v8, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v10, v[8:9] +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v15, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v15, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v0 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v1 -; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v11, vcc, v4, v0 -; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v12 +; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v8, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v1 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v14 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v14 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 @@ -2810,137 +2505,120 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] ; CGP-NEXT: .LBB8_7: -; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v10 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v3 -; CGP-NEXT: v_xor_b32_e32 v3, v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v11, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v4, v6, v8 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_mul_hi_u32 v10, v11, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v9 -; CGP-NEXT: v_mul_lo_u32 v15, v6, v9 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v8, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v4 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 -; CGP-NEXT: v_mov_b32_e32 v4, v9 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12 -; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc -; CGP-NEXT: v_xor_b32_e32 v7, v4, v12 -; CGP-NEXT: v_mul_lo_u32 v4, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, v11, v9 -; CGP-NEXT: v_xor_b32_e32 v13, v5, v12 -; CGP-NEXT: v_mul_hi_u32 v5, v11, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v5, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_hi_u32 v13, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v10, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v12, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v3 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v13, vcc +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v5, v2 +; CGP-NEXT: v_mul_lo_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v5, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v3 +; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[4:5], v2, v11, vcc +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v2, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v11, v5, v3 +; CGP-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v9, 0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc -; CGP-NEXT: v_mul_lo_u32 v6, v13, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v13, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v13, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v4, v6 -; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6] -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6] -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v13, v5, vcc -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v9, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v5, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v3 -; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v4, v2 -; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v6 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v12 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v12 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] @@ -3036,223 +2714,189 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], 0, 0 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v3 ; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 ; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v9, v7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v9 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 -; GISEL-NEXT: v_mul_hi_u32 v14, v10, v7 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v13, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v4 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v7 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 ; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 -; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0 -; GISEL-NEXT: v_mov_b32_e32 v5, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v4 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v13, v4 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9] -; GISEL-NEXT: v_mul_hi_u32 v0, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 -; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v13, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v10, v7 ; GISEL-NEXT: v_and_b32_e32 v12, 0xffffff, v2 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v5, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v0, v13, v7 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v7 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0 -; GISEL-NEXT: v_mov_b32_e32 v0, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v5, v[0:1] -; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9] -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v0 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v2 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v6, v9, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v5, v3, vcc -; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v7, v1 -; GISEL-NEXT: v_subbrev_u32_e64 v13, s[4:5], 0, v10, vcc -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v6, v4 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: v_sub_i32_e64 v15, s[4:5], 0, v2 -; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, v3, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v3 -; GISEL-NEXT: v_mov_b32_e32 v0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v17, v[0:1] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v16, v14, v[5:6] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v13, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v6, v18, v0, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v0, v17, v4 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v5 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v4 -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v10, v3, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v19, v17, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v18, v0 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v19, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GISEL-NEXT: v_mul_hi_u32 v5, v17, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v18, v4 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0 -; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v11, v1 -; GISEL-NEXT: v_mov_b32_e32 v0, v5 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v15, v17, v[0:1] -; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1] -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v18, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v6, v13, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v17, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v14, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v14, v4 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], 0, v12 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v10, v17, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v14, v0 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v10, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5 -; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v5 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v14, v4 -; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v17, v0, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc ; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v6, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v11, v4 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v0, v7, vcc +; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v4 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v10, 0 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v7, v[0:1] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[7:8] +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], v3, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v3, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v5, v8, s[4:5] +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v4 +; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], 0, v2 +; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v6, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v7, v1 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v4 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v16, v3 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_addc_u32_e64 v17, s[4:5], v17, v20, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v5 +; GISEL-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v17, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v20, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v17, s[4:5], 0, v17, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v4 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v9, v5, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v17, v6, v19, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v14, v3, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v16, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 0, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v5 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v4, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v9, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v12, v5 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v4 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v3, v0 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v9, v0 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v4, v5 -; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v0, v6 +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], v8, v7, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v9, v12, v5 +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v7, v4 +; GISEL-NEXT: v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v7, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v5 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v7 ; GISEL-NEXT: v_mov_b32_e32 v0, v5 ; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[0:1] ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index d155513..b152c06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -16,10 +16,10 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB0_4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -50,18 +50,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -79,18 +74,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc @@ -102,39 +92,34 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v6, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v6, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v10 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v5, v6, vcc -; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v5, v7, vcc +; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v7 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 @@ -144,7 +129,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc @@ -152,7 +137,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) { ; CHECK-NEXT: ; implicit-def: $vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 @@ -197,10 +182,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 -; CHECK-NEXT: s_sub_u32 s4, 0, s2 +; CHECK-NEXT: s_sub_u32 s10, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 -; CHECK-NEXT: s_subb_u32 s5, 0, s3 +; CHECK-NEXT: s_subb_u32 s11, 0, s3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 @@ -208,10 +193,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s10, v1 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 @@ -222,25 +207,20 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s10, v4 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 @@ -251,18 +231,13 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v5, vcc, v5, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -274,47 +249,42 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v1, vcc, v1, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v1 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v8 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v6, vcc +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v9, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; CHECK-NEXT: s_mov_b32 s4, 0 @@ -368,233 +338,203 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v7 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v7, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v8 ; GISEL-NEXT: v_mul_lo_u32 v8, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v18, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v16, v8 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v19 -; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v14, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v16, v5, v8 -; GISEL-NEXT: v_mul_hi_u32 v17, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v19, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v8 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9 -; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13 -; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10 -; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14 -; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v8, s[6:7], v8, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v9, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v8 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v19, v10 +; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], 1, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v21, v11 +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], 1, v18 +; GISEL-NEXT: v_sub_i32_e64 v0, s[8:9], v0, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[10:11], 1, v19 +; GISEL-NEXT: v_sub_i32_e64 v2, s[12:13], v2, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v10 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v4 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v4 -; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v20, v4, v10 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v4 +; GISEL-NEXT: v_sub_i32_e64 v0, s[16:17], v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[18:19], v0, v4 ; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v4, v6, v12 +; GISEL-NEXT: v_mul_lo_u32 v4, v6, v11 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, 0, v11, s[4:5] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 -; GISEL-NEXT: v_addc_u32_e64 v2, s[6:7], 0, v12, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_add_i32_e64 v4, s[20:21], v16, v4 +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, v0, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v6 +; GISEL-NEXT: v_addc_u32_e64 v2, s[10:11], 0, v15, s[10:11] ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[16:17] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v19, v4 -; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, v0, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 -; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, v2, s[12:13] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], v1, v16, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v16 -; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v3, v4, s[8:9] -; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[22:23] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v5 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v7 -; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[8:9] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[18:19] -; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21] -; GISEL-NEXT: v_cndmask_b32_e64 v16, v16, v20, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[10:11], v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v17 +; GISEL-NEXT: v_subb_u32_e64 v17, vcc, v1, v13, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v13 +; GISEL-NEXT: v_subb_u32_e64 v13, vcc, v3, v4, s[12:13] +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[18:19] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v17, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v17, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[6:7] +; GISEL-NEXT: v_subb_u32_e64 v1, s[6:7], v1, v5, s[8:9] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v7 +; GISEL-NEXT: v_subb_u32_e64 v3, s[8:9], v3, v7, s[12:13] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v13, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v6, v13, v6, s[10:11] +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[16:17] +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, v5 @@ -602,19 +542,19 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v16 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v18, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v19, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v5, v2, v17, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v20, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v12, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v16, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v5, v15, v2, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v11, v5, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i64: @@ -630,7 +570,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 @@ -658,18 +598,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v14, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v13, vcc, v13, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc @@ -687,18 +622,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v15, vcc +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v13, v12 +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, vcc, v3, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -710,49 +640,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v13, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CGP-NEXT: v_mul_lo_u32 v2, v4, v1 +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v1 ; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 ; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v14 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v2, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v11, vcc -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v13, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc @@ -761,7 +686,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 ; CGP-NEXT: .LBB2_2: ; %Flow1 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -791,10 +716,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB2_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB2_8 ; CGP-NEXT: .LBB2_6: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -825,18 +750,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc @@ -854,18 +774,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v10 +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -877,49 +792,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v6, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v3 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v12 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v9, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v7, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 +; CGP-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v11, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc @@ -927,7 +837,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr6 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_6 ; CGP-NEXT: .LBB2_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -989,18 +899,13 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v2, v0, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v4, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc ; CHECK-NEXT: v_mul_hi_u32 v1, v1, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 @@ -1030,33 +935,23 @@ define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { ; CHECK-NEXT: v_mul_hi_u32 v2, v2, v5 ; CHECK-NEXT: v_mul_hi_u32 v3, v3, v5 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v8 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v10 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v5, v0, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[8:9], v11, v12 +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], v5, v13, s[8:9] +; CHECK-NEXT: v_add_i32_e64 v4, s[10:11], v14, v4 +; CHECK-NEXT: v_addc_u32_e64 v2, s[12:13], v4, v2, s[10:11] +; CHECK-NEXT: v_addc_u32_e64 v4, s[14:15], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[8:9] +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[12:13] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc +; CHECK-NEXT: v_addc_u32_e64 v2, vcc, v2, v6, s[10:11] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1079,10 +974,10 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB7_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB7_4 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -1113,18 +1008,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc @@ -1142,18 +1032,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -1165,57 +1050,52 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 -; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v2, v5, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v10 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v4, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v4, v7, vcc +; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v7 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v3, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v4, v11, v12, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 -; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB7_2 ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -1255,253 +1135,223 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v5 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v5, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v6 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v6 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v12, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v6 ; GISEL-NEXT: v_mul_lo_u32 v6, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v18, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v16, v6 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v19 -; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v17, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v18, v4, v9 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v4, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9 -; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13 -; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10 -; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14 -; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v4 -; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v7 -; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v20, v7, v10 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v7 -; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v2, v4, v12 -; GISEL-NEXT: v_add_i32_e64 v4, s[24:25], v16, v20 -; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, v12, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], v19, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v17 -; GISEL-NEXT: v_subb_u32_e64 v17, s[6:7], v1, v4, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v6, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v6, vcc, v9, v6, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v8, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v6 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v19, v10 +; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], 1, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v21, v11 +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], 1, v18 +; GISEL-NEXT: v_sub_i32_e64 v0, s[8:9], v0, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[10:11], 1, v19 +; GISEL-NEXT: v_sub_i32_e64 v2, s[12:13], v2, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[14:15], v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v15, s[14:15], v16, v15 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 +; GISEL-NEXT: v_sub_i32_e64 v0, s[14:15], v0, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v0, v7 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], 0, v11, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 +; GISEL-NEXT: v_sub_i32_e64 v2, s[18:19], v2, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[20:21], v2, v4 +; GISEL-NEXT: v_addc_u32_e64 v2, s[6:7], 0, v16, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v13, v14 +; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, v0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v15, v17 +; GISEL-NEXT: v_subb_u32_e64 v14, s[6:7], v1, v4, s[8:9] ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[16:17] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v17, v8 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[14:15], v17, v8 -; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, v0, s[10:11] -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[18:19] +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v14, v8 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v14, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[8:9] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v1, v8 -; GISEL-NEXT: v_addc_u32_e64 v1, s[12:13], 0, v7, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[22:23] -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, vcc, v3, v2, s[8:9] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v3, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 -; GISEL-NEXT: v_subb_u32_e64 v2, s[8:9], v2, v5, s[8:9] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v2, vcc, 0, v2, s[20:21] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v16, s[14:15] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v4, s[8:9] -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v1, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[16:17] +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[20:21] +; GISEL-NEXT: v_subb_u32_e64 v15, s[10:11], v3, v13, s[12:13] +; GISEL-NEXT: v_sub_i32_e64 v3, s[10:11], v3, v13 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[10:11], v15, v5 +; GISEL-NEXT: v_subb_u32_e64 v3, s[12:13], v3, v5, s[12:13] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[12:13], v15, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[18:19] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v4, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v15, v14, s[12:13] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v19, s[6:7] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v8, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v13, v18, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v17, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v5, v7, v1, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v20, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v12, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v4, v16, v2, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v5, v0, v7, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v11, v5, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom: @@ -1520,7 +1370,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 @@ -1548,18 +1398,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v15, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v14, vcc, v14, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v16, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc @@ -1577,18 +1422,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v16, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v13 +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v12, vcc, v12, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -1600,58 +1440,53 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 -; CGP-NEXT: v_mul_hi_u32 v14, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v14, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v2, v1 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v0 +; CGP-NEXT: v_mul_hi_u32 v13, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v1 ; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v0 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v15 ; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12 -; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v9, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v2 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v12, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v9, v12 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 +; CGP-NEXT: v_cndmask_b32_e32 v8, v13, v12, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v3 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v13, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v14, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB8_4 @@ -1683,10 +1518,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB8_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB8_8 ; CGP-NEXT: .LBB8_6: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -1717,18 +1552,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc @@ -1746,18 +1576,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v11, v8 +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v6, vcc, v6, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -1769,57 +1594,52 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v9, v2 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v6, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_mul_lo_u32 v4, v9, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v8, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v12 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v9 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v7, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v7, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e32 v6, v11, v7, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v7, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v11, vcc +; CGP-NEXT: v_cndmask_b32_e32 v6, v13, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 -; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB8_6 ; CGP-NEXT: .LBB8_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -1958,39 +1778,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v20, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v19, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v23 +; GISEL-NEXT: v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[10:11], v24, v17 +; GISEL-NEXT: v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v19, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 @@ -2016,39 +1826,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v12 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v15 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v20, v13 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v14, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 @@ -2066,78 +1866,76 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 -; GISEL-NEXT: v_mul_hi_u32 v13, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7 -; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v6 ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v7 ; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v10 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v15 ; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v16, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v18 ; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v19, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 -; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v8, vcc -; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v13 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, v10, vcc +; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v10 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v13, s[6:7], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v9 +; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v11, s[6:7], 0, v11 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v14, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 -; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v15, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, -1, v12, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v13, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v11, s[4:5] ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v13, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v11, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v18, v17, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v20, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v6, v19, v21, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index a58397e..e5dbe60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -116,8 +116,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX8-NEXT: s_sub_u32 s2, 0, s10 -; GFX8-NEXT: s_subb_u32 s3, 0, s11 +; GFX8-NEXT: s_sub_u32 s14, 0, s10 +; GFX8-NEXT: s_subb_u32 s15, 0, s11 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -128,10 +128,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -139,44 +139,34 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[12:13], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -184,28 +174,23 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v6, s9 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, s9, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] ; GFX8-NEXT: v_mov_b32_e32 v5, s11 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v0 ; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc ; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s9, v1 @@ -218,8 +203,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1] ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s10, v2 ; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v3 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 @@ -235,8 +220,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v9, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v14, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] @@ -255,8 +240,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX9-NEXT: s_sub_u32 s2, 0, s10 -; GFX9-NEXT: s_subb_u32 s3, 0, s11 +; GFX9-NEXT: s_sub_u32 s14, 0, s10 +; GFX9-NEXT: s_subb_u32 s15, 0, s11 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -267,10 +252,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -278,71 +263,59 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[12:13], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v5, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] ; GFX9-NEXT: v_mov_b32_e32 v6, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v5, v[1:2] +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, s9, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] ; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v0 ; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v6 @@ -351,19 +324,19 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v6 -; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[0:1] ; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s10, v2 ; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v5, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s10, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s10, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 ; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc @@ -372,14 +345,14 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v10, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v15, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v15, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v5, s[0:1] -; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v4, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v7, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v7, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_i64: @@ -388,7 +361,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX10-NEXT: s_sub_u32 s0, 0, s10 +; GFX10-NEXT: s_sub_u32 s2, 0, s10 +; GFX10-NEXT: s_subb_u32 s3, 0, s11 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -399,82 +373,69 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s0, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s0, v4, v[1:2] -; GFX10-NEXT: s_subb_u32 s1, 0, s11 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, s1, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s2, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v6, s2, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v2, s2, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s2 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s12, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s0, v4, v[1:2] -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s1, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2] +; GFX10-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 -; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 -; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v7, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v8, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 -; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX10-NEXT: v_mul_hi_u32 v0, s9, v0 ; GFX10-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX10-NEXT: v_mul_lo_u32 v5, s9, v1 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX10-NEXT: v_mul_hi_u32 v6, s8, v1 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v3 ; GFX10-NEXT: v_add_co_u32 v0, s0, v5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v5, s0, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v6, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0 ; GFX10-NEXT: v_mul_hi_u32 v2, s9, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v5, 0 -; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v4, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v2, v3 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s10, v3, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s11, v5, v[1:2] -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s11, v4, v[1:2] +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s8, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, s9, v1 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s0, s9, v1, vcc_lo @@ -491,7 +452,7 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 ; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v5, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v8 @@ -500,13 +461,13 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) ; GFX10-NEXT: v_sub_co_u32 v10, s0, v6, s10 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v0, s0, 0, v0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v10, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v5, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v4, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v6, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v8, v9, s0 ; GFX10-NEXT: global_store_dwordx2 v10, v[0:1], s[4:5] @@ -980,12 +941,11 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-LABEL: udivrem_v2i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX8-NEXT: s_sub_u32 s2, 0, s12 -; GFX8-NEXT: s_subb_u32 s3, 0, s13 +; GFX8-NEXT: s_sub_u32 s8, 0, s12 +; GFX8-NEXT: s_subb_u32 s9, 0, s13 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -996,10 +956,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1007,257 +967,228 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v7, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX8-NEXT: v_addc_u32_e64 v2, s[6:7], 0, 0, vcc +; GFX8-NEXT: v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] -; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 ; GFX8-NEXT: s_sub_u32 s2, 0, s14 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: s_subb_u32 s3, 0, s15 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX8-NEXT: s_subb_u32 s3, 0, s15 +; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX8-NEXT: v_mov_b32_e32 v5, s13 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v4, s9, v1 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v6, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v4, v2 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v7, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s8, v0 -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v6, v[1:2] -; GFX8-NEXT: v_mov_b32_e32 v4, s13 -; GFX8-NEXT: v_subb_u32_e64 v0, s[0:1], v3, v1, vcc -; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s9, v1 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v1 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; GFX8-NEXT: v_addc_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_hi_u32 v3, s9, v1 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v7, 0 +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v3, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v8, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s13, v7, v[1:2] +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s8, v0 +; GFX8-NEXT: v_subb_u32_e64 v0, s[0:1], v4, v2, vcc +; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s9, v2 ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v9, v2, v3, s[0:1] -; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s15 -; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v4, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s14 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 -; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v8 -; GFX8-NEXT: v_add_f32_e32 v1, v2, v1 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v5, vcc -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v6 -; GFX8-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 -; GFX8-NEXT: v_trunc_f32_e32 v14, v2 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v14 -; GFX8-NEXT: v_add_f32_e32 v1, v2, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v1 -; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v7, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v3, v4, s[0:1] +; GFX8-NEXT: v_cvt_f32_u32_e32 v3, s15 +; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v2, v5, vcc +; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v1 +; GFX8-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v6, vcc +; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v7 +; GFX8-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; GFX8-NEXT: v_trunc_f32_e32 v14, v3 +; GFX8-NEXT: v_mul_f32_e32 v3, 0xcf800000, v14 +; GFX8-NEXT: v_add_f32_e32 v2, v3, v2 +; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v2 +; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v8, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v15, 0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v15, 0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v16, v3, v16, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v14, v[2:3] +; GFX8-NEXT: v_cndmask_b32_e64 v16, v4, v16, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v14, v[3:4] ; GFX8-NEXT: v_add_u32_e64 v17, s[0:1], 1, v12 ; GFX8-NEXT: v_addc_u32_e64 v18, s[0:1], 0, v13, s[0:1] -; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v15, v[2:3] -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v5, v4, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, v14, v1 -; GFX8-NEXT: v_mul_lo_u32 v5, v15, v2 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v15, v[3:4] +; GFX8-NEXT: v_subb_u32_e32 v4, vcc, v6, v5, vcc +; GFX8-NEXT: v_mul_lo_u32 v5, v14, v2 +; GFX8-NEXT: v_mul_lo_u32 v6, v15, v3 ; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, s12, v10 -; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v3, vcc -; GFX8-NEXT: v_mul_hi_u32 v3, v15, v1 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v4, v14, v2 -; GFX8-NEXT: v_mul_hi_u32 v1, v14, v1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v15, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v4, vcc +; GFX8-NEXT: v_mul_hi_u32 v4, v15, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 ; GFX8-NEXT: v_mul_hi_u32 v2, v14, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v15, vcc, v15, v1 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0 -; GFX8-NEXT: v_addc_u32_e32 v14, vcc, v14, v2, vcc +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v5, v4, vcc +; GFX8-NEXT: v_mul_lo_u32 v4, v14, v3 +; GFX8-NEXT: v_mul_hi_u32 v6, v15, v3 +; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2 +; GFX8-NEXT: v_mul_hi_u32 v3, v14, v3 +; GFX8-NEXT: v_addc_u32_e64 v2, s[0:1], v2, v6, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[0:1], 0, 0, s[0:1] +; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, v15, v2 +; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v15, 0 +; GFX8-NEXT: v_addc_u32_e32 v14, vcc, v14, v3, vcc ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v12, v17, vcc -; GFX8-NEXT: v_mov_b32_e32 v1, v4 -; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v14, v[1:2] +; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v17, vcc +; GFX8-NEXT: v_mov_b32_e32 v2, v5 +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v14, v[2:3] ; GFX8-NEXT: v_cndmask_b32_e32 v12, v13, v18, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v15, v[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v12, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v5, v10, v19, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v14, v3 -; GFX8-NEXT: v_mul_lo_u32 v9, v15, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1] -; GFX8-NEXT: v_mul_hi_u32 v8, v15, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v20, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s3, v15, v[5:6] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v3, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v12, s[0:1] +; GFX8-NEXT: v_mul_lo_u32 v7, v14, v4 +; GFX8-NEXT: v_mul_lo_u32 v8, v15, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v19, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, v15, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v20, vcc ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v8, v14, v4 -; GFX8-NEXT: v_mul_hi_u32 v3, v14, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v15, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v8, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GFX8-NEXT: v_mul_lo_u32 v7, v14, v5 ; GFX8-NEXT: v_mul_hi_u32 v4, v14, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v7 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v15, v3 -; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v14, v4, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, s11, v3 -; GFX8-NEXT: v_mul_lo_u32 v8, s10, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1] -; GFX8-NEXT: v_mul_hi_u32 v0, s10, v3 -; GFX8-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, v15, v5 +; GFX8-NEXT: v_addc_u32_e64 v8, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 +; GFX8-NEXT: v_mul_hi_u32 v5, v14, v5 +; GFX8-NEXT: v_addc_u32_e64 v4, s[2:3], v4, v10, vcc +; GFX8-NEXT: v_addc_u32_e64 v7, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v15, v4 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v14, v5, vcc ; GFX8-NEXT: v_mul_lo_u32 v7, s11, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v8, v0 -; GFX8-NEXT: v_mul_hi_u32 v8, s10, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v8, s10, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v6, s[0:1] +; GFX8-NEXT: v_mul_hi_u32 v1, s10, v4 +; GFX8-NEXT: v_mul_hi_u32 v4, s11, v4 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v3, v0 -; GFX8-NEXT: v_mul_hi_u32 v8, s11, v4 -; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v9, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v8, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, v4 -; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v10, v[0:1] -; GFX8-NEXT: v_mov_b32_e32 v4, s11 -; GFX8-NEXT: v_mov_b32_e32 v0, s15 -; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v9, v[7:8] -; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s10, v3 -; GFX8-NEXT: v_subb_u32_e64 v11, s[0:1], v4, v7, vcc -; GFX8-NEXT: v_sub_u32_e64 v3, s[0:1], s11, v7 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v11 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc +; GFX8-NEXT: v_mul_lo_u32 v1, s11, v5 +; GFX8-NEXT: v_mul_hi_u32 v8, s10, v5 +; GFX8-NEXT: v_addc_u32_e64 v7, s[2:3], 0, 0, vcc +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 +; GFX8-NEXT: v_addc_u32_e64 v1, s[2:3], v1, v8, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[2:3], 0, 0, s[2:3] +; GFX8-NEXT: v_addc_u32_e32 v8, vcc, v1, v7, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc +; GFX8-NEXT: v_mul_hi_u32 v10, s11, v5 +; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s14, v8, 0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, v0, v9, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v9, vcc, v10, v1 +; GFX8-NEXT: v_mov_b32_e32 v0, v5 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v9, v[0:1] +; GFX8-NEXT: v_mov_b32_e32 v10, s11 +; GFX8-NEXT: v_mov_b32_e32 v5, s15 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s15, v8, v[0:1] +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s10, v4 +; GFX8-NEXT: v_subb_u32_e64 v10, s[0:1], v10, v0, vcc +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s11, v0 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v11 -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s14, v8 -; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v3, vcc -; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v9 -; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v10, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s14, v1 +; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v8 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v7 -; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v3, v0, vcc +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v12 -; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, s14, v7 +; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, s14, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v16, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v16, s[0:1], 1, v13 ; GFX8-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GFX8-NEXT: v_addc_u32_e64 v17, s[0:1], 0, v14, s[0:1] ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v13, v16, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v16, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v13, s[0:1] -; GFX8-NEXT: v_mov_b32_e32 v10, s5 -; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v18, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v8, v11, v18, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; GFX8-NEXT: v_mov_b32_e32 v9, s4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v8, v11, v0, s[0:1] -; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[1:4] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v13, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v8, v1, v8, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v10, v0, s[0:1] +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX8-NEXT: v_mov_b32_e32 v0, s6 ; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[5:8] +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[6:9] ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: udivrem_v2i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 +; GFX9-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s13 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX9-NEXT: s_sub_u32 s2, 0, s12 -; GFX9-NEXT: s_subb_u32 s3, 0, s13 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s17 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s16 +; GFX9-NEXT: s_sub_u32 s8, 0, s16 +; GFX9-NEXT: s_subb_u32 s9, 0, s17 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v2, v1 @@ -1265,10 +1196,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 @@ -1276,220 +1207,195 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 -; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v7, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[6:7], 0, 0, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0 +; GFX9-NEXT: s_sub_u32 s2, 0, s18 +; GFX9-NEXT: s_subb_u32 s3, 0, s19 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2] ; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 -; GFX9-NEXT: s_sub_u32 s2, 0, s14 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2] ; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 ; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 -; GFX9-NEXT: s_subb_u32 s3, 0, s15 +; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 -; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 -; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mul_hi_u32 v5, s9, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, s13, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s12, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s13, v0 +; GFX9-NEXT: v_mov_b32_e32 v5, s13 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 -; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_mov_b32_e32 v6, s13 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v8, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_add3_u32 v9, v3, v0, v5 -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s12, v9, v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s13, v8, v[2:3] -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v1 -; GFX9-NEXT: v_subb_co_u32_e64 v1, s[0:1], v5, v3, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s13, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s12, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v3, s13, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v8, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_u32_e32 v9, v3, v2 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v9, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v6, s17 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s17, v8, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s12, v0 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v5, v3, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s17, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v2 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s16, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v1 -; GFX9-NEXT: v_sub_u32_e32 v3, s9, v3 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s17, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, s13, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v10, v4, v5, s[0:1] -; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s19 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v6, vcc -; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s14 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s18 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s12, v2 +; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s16, v1 ; GFX9-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 ; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v7, vcc ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v8 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; GFX9-NEXT: v_trunc_f32_e32 v15, v4 -; GFX9-NEXT: v_mul_f32_e32 v4, 0xcf800000, v15 +; GFX9-NEXT: v_trunc_f32_e32 v5, v4 +; GFX9-NEXT: v_mul_f32_e32 v4, 0xcf800000, v5 ; GFX9-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v16, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v3 ; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v16, 0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v15 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v15, v[4:5] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s17, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v5 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s16, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[0:1] +; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v6, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v17, v[4:5] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s17, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v18, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v15, v[4:5] +; GFX9-NEXT: v_mul_lo_u32 v5, v17, v3 +; GFX9-NEXT: v_mul_hi_u32 v6, v15, v3 +; GFX9-NEXT: v_mul_lo_u32 v20, v15, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v17, v3 ; GFX9-NEXT: v_add_co_u32_e64 v18, s[0:1], 1, v13 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v20 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, v17, v4 +; GFX9-NEXT: v_mul_hi_u32 v20, v15, v4 ; GFX9-NEXT: v_addc_co_u32_e64 v19, s[0:1], 0, v14, s[0:1] -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v16, v[4:5] -; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v15, v3 -; GFX9-NEXT: v_mul_lo_u32 v7, v16, v4 -; GFX9-NEXT: v_subrev_co_u32_e32 v20, vcc, s12, v11 -; GFX9-NEXT: v_subbrev_co_u32_e32 v21, vcc, 0, v5, vcc -; GFX9-NEXT: v_mul_hi_u32 v5, v16, v3 -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v15, v4 -; GFX9-NEXT: v_mul_hi_u32 v3, v15, v3 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_mul_hi_u32 v7, v16, v4 -; GFX9-NEXT: v_mul_hi_u32 v4, v15, v4 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v6, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v5 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v16, vcc, v16, v3 -; GFX9-NEXT: v_add3_u32 v4, v6, v5, v4 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v16, 0 -; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v15, v4, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v13, v18, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v5, v3 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v3, v20, vcc +; GFX9-NEXT: v_mul_hi_u32 v4, v17, v4 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, 0, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v15, vcc, v15, v3 +; GFX9-NEXT: v_add_u32_e32 v4, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v15, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v4, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v15, v[3:4] -; GFX9-NEXT: v_cndmask_b32_e32 v13, v14, v19, vcc +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v17, v[3:4] +; GFX9-NEXT: v_subrev_co_u32_e32 v20, vcc, s16, v11 +; GFX9-NEXT: v_subbrev_co_u32_e32 v21, vcc, 0, v7, vcc +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v15, v[3:4] +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v18, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[2:3], s3, v16, v[6:7] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v13, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v8, v15, v5 -; GFX9-NEXT: v_mul_lo_u32 v9, v16, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v20, vcc -; GFX9-NEXT: v_mul_hi_u32 v11, v16, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v13, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v7, v17, v5 +; GFX9-NEXT: v_mul_lo_u32 v8, v15, v6 +; GFX9-NEXT: v_mul_hi_u32 v10, v15, v5 +; GFX9-NEXT: v_mul_hi_u32 v5, v17, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v14, v14, v19, vcc +; GFX9-NEXT: v_add_co_u32_e64 v7, s[2:3], v7, v8 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], v7, v10, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v7, v17, v6 +; GFX9-NEXT: v_mul_hi_u32 v10, v15, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_add_co_u32_e64 v5, s[2:3], v7, v5 +; GFX9-NEXT: v_mul_hi_u32 v6, v17, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[4:5], v5, v10, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], v5, v8, s[2:3] +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], 0, v7, s[2:3] +; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 +; GFX9-NEXT: v_add_co_u32_e64 v5, s[2:3], v15, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[2:3], v17, v6, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v7, s15, v5 +; GFX9-NEXT: v_mul_lo_u32 v8, s14, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v20, vcc +; GFX9-NEXT: v_mul_hi_u32 v11, s14, v5 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v12, v21, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v11, v15, v6 -; GFX9-NEXT: v_mul_hi_u32 v5, v15, v5 -; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 -; GFX9-NEXT: v_mul_hi_u32 v9, v16, v6 -; GFX9-NEXT: v_mul_hi_u32 v6, v15, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v11, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8 -; GFX9-NEXT: v_add_u32_e32 v9, v11, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v6, v9, v8, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v16, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v15, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v8, s11, v5 -; GFX9-NEXT: v_mul_lo_u32 v9, s10, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v2, v7, s[0:1] -; GFX9-NEXT: v_mul_hi_u32 v2, s10, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, s11, v5 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v8, s11, v6 -; GFX9-NEXT: v_add_u32_e32 v2, v9, v2 -; GFX9-NEXT: v_mul_hi_u32 v9, s10, v6 -; GFX9-NEXT: v_mul_hi_u32 v13, s11, v6 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v8, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v5, v2 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s14, v12, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v10, s[0:1] -; GFX9-NEXT: v_add_u32_e32 v1, v11, v9 -; GFX9-NEXT: v_add3_u32 v9, v1, v2, v13 +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v11, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, s15, v6 +; GFX9-NEXT: v_mul_hi_u32 v5, s15, v5 +; GFX9-NEXT: v_mul_hi_u32 v11, s14, v6 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[2:3], v5, v11, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v5, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[2:3], 0, 0, s[2:3] +; GFX9-NEXT: v_mul_hi_u32 v13, s15, v6 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s18, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v1, v9, s[0:1] +; GFX9-NEXT: v_add_u32_e32 v9, v13, v12 ; GFX9-NEXT: v_mov_b32_e32 v1, v6 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v9, v[1:2] -; GFX9-NEXT: v_mov_b32_e32 v10, s11 -; GFX9-NEXT: v_mov_b32_e32 v6, s15 -; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v12, v[1:2] -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s10, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v2, v10, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v9, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v10, s15 +; GFX9-NEXT: v_mov_b32_e32 v6, s19 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v11, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s14, v5 ; GFX9-NEXT: v_subb_co_u32_e64 v10, s[0:1], v10, v1, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10 -; GFX9-NEXT: v_sub_u32_e32 v1, s11, v1 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v10 +; GFX9-NEXT: v_sub_u32_e32 v1, s15, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v10 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s14, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v12, vcc, s18, v2 ; GFX9-NEXT: v_subbrev_co_u32_e64 v13, s[0:1], 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v12 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v13 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v13 ; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v12 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v13 -; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s14, v11 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v13 +; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s18, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v17, s[0:1], 1, v14 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1498,28 +1404,31 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v14, v17, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v18, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v12, v6, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v11, v6, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v14, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v19, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v19, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[0:1] -; GFX9-NEXT: global_store_dwordx4 v0, v[3:6], s[4:5] -; GFX9-NEXT: global_store_dwordx4 v0, v[7:10], s[6:7] +; GFX9-NEXT: global_store_dwordx4 v0, v[3:6], s[8:9] +; GFX9-NEXT: global_store_dwordx4 v0, v[7:10], s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_v2i64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 +; GFX10-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x20 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s13 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 -; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s12 -; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s14 -; GFX10-NEXT: s_sub_u32 s0, 0, s12 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s17 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s19 +; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s16 +; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s18 +; GFX10-NEXT: s_sub_u32 s7, 0, s16 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; GFX10-NEXT: s_subb_u32 s1, 0, s13 +; GFX10-NEXT: s_subb_u32 s20, 0, s17 +; GFX10-NEXT: s_sub_u32 s8, 0, s18 +; GFX10-NEXT: s_subb_u32 s21, 0, s19 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -1538,17 +1447,15 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v7, 0 -; GFX10-NEXT: s_sub_u32 s2, 0, s14 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s3, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v7, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s8, v8, 0 ; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s3, s0, v9, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s3, s2, v10, v[3:4] +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4] ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0 -; GFX10-NEXT: s_subb_u32 s3, 0, s15 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s6, s1, v7, v[4:5] +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5] ; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s3, v8, v[5:6] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6] ; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2 ; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2 ; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2 @@ -1560,46 +1467,38 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 ; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0 -; GFX10-NEXT: v_add_co_u32 v6, s6, v6, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v11, s6, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v1, s6, v1, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v2, s6, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v4, s6, v6, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v6, s6, v11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v1, s6, v1, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v2, s6, v2, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v4, s6, v6, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v4 -; GFX10-NEXT: v_add_co_u32 v1, s6, v2, v1 -; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s6 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v12 +; GFX10-NEXT: v_add_co_u32 v1, s2, v1, v15 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v4, s0, v13, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, v1, v5, s2 +; GFX10-NEXT: v_add_co_u32 v1, s3, v16, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, v4, v14, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s9, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s6, v1, v17, s3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v11, s1, 0, 0, s6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo -; GFX10-NEXT: v_add3_u32 v2, v5, v2, v0 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v7, 0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v10, v2, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s6, s2, v8, 0 -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v0, v5 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v7, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s8, v8, 0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v10, v4, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s0, v9, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s2, v10, v[3:4] +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4] ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0 -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s1, v7, v[4:5] +; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5] ; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s3, v8, v[5:6] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6] ; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2 ; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2 ; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2 @@ -1611,156 +1510,142 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 ; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 ; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v11, s0, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v16, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1 -; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3 -; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v7, v4 -; GFX10-NEXT: v_add3_u32 v0, v5, v2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v9, v3, vcc_lo +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v12 +; GFX10-NEXT: v_add_co_u32 v1, s2, v1, v15 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v4, s0, v13, v11 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, v1, v5, s2 +; GFX10-NEXT: v_add_co_u32 v1, s3, v16, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, v4, v14, s0 +; GFX10-NEXT: s_mov_b32 null, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s5, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, 0, 0, s1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s4, v1, v17, s3 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s1, 0, 0, s2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v11, s1, 0, 0, s4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v7, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v3, vcc_lo ; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v8, v1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v0, vcc_lo, v10, v0, vcc_lo ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mul_lo_u32 v3, s9, v4 -; GFX10-NEXT: v_mul_lo_u32 v8, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v5, s8, v4 -; GFX10-NEXT: v_mul_hi_u32 v4, s9, v4 -; GFX10-NEXT: v_mul_lo_u32 v9, s9, v2 -; GFX10-NEXT: v_mul_lo_u32 v6, s11, v1 -; GFX10-NEXT: v_mul_hi_u32 v10, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v11, s9, v2 -; GFX10-NEXT: v_mul_lo_u32 v2, s10, v0 -; GFX10-NEXT: v_mul_hi_u32 v7, s10, v1 -; GFX10-NEXT: v_mul_hi_u32 v1, s11, v1 -; GFX10-NEXT: v_mul_lo_u32 v12, s11, v0 -; GFX10-NEXT: v_mul_hi_u32 v13, s10, v0 -; GFX10-NEXT: v_mul_hi_u32 v14, s11, v0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v3, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v9, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v12, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v8, v5 -; GFX10-NEXT: v_add_co_u32 v8, s0, v4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v10, s0, v1, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s12, v8, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s14, v10, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v9, v7 -; GFX10-NEXT: v_add3_u32 v9, v5, v4, v11 -; GFX10-NEXT: v_add_co_u32 v12, vcc_lo, v8, 1 +; GFX10-NEXT: v_mul_lo_u32 v4, s13, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v10, v0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v0, s12, v3 +; GFX10-NEXT: v_mul_hi_u32 v5, s12, v2 +; GFX10-NEXT: v_mul_hi_u32 v2, s13, v2 +; GFX10-NEXT: v_mul_lo_u32 v7, s15, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, s14, v1 +; GFX10-NEXT: v_mul_hi_u32 v9, s15, v1 +; GFX10-NEXT: v_mul_lo_u32 v1, s13, v3 +; GFX10-NEXT: v_mul_hi_u32 v10, s12, v3 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v4, v0 +; GFX10-NEXT: v_mul_lo_u32 v11, s14, v6 +; GFX10-NEXT: v_mul_hi_u32 v3, s13, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v0, vcc_lo, v0, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v0, s0, v1, v2 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v2, s15, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s1, v0, v10, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_mul_hi_u32 v5, s14, v6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v10, vcc_lo, v0, v1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v11 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s16, v10, 0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v9 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v8, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v7, v3, v4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s1, v2, v5, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s1, 0, 0, s1 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s16, v7, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e64 v8, vcc_lo, v5, v3, s0 +; GFX10-NEXT: v_mul_hi_u32 v6, s15, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s18, v8, 0 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 -; GFX10-NEXT: v_add3_u32 v7, v7, v6, v14 -; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s12, v9, v[1:2] -; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v9, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s14, v7, v[3:4] -; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s13, v8, v[4:5] -; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v12, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v13, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s8, v0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s15, v10, v[5:6] -; GFX10-NEXT: v_sub_co_ci_u32_e64 v5, s0, s9, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s12, v14 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s9, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, -1, s0 -; GFX10-NEXT: v_sub_co_u32 v15, s0, s10, v2 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo -; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s1, s11, v0, s0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v15 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s11, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v17, vcc_lo, v14, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v1, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s13, v5 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v23, s0, s15, v0, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v18 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s12, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s13, v18 -; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, -1, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v21, v20, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, -1, s1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0 -; GFX10-NEXT: v_sub_co_u32 v0, s0, v17, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v19, s0, 0, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v6, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v6, s1, v15, s14 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v17, v0, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s2, 0, v23, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v18, v19, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v14, v4, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v3, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v22, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s15, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc_lo -; GFX10-NEXT: v_add_co_u32 v13, vcc_lo, v10, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s15, v12 -; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v13, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v14, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, vcc_lo, s15, v23, s1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: v_sub_co_u32 v8, s1, v6, s14 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v18, s1 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v2 -; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v8, v12, v18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v9, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v13, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v15, v6, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v8, s1 -; GFX10-NEXT: global_store_dwordx4 v11, v[0:3], s[4:5] -; GFX10-NEXT: global_store_dwordx4 v11, v[4:7], s[6:7] +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s17, v10, v[1:2] +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v10, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v1, v3 +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, s12, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, s13, v4 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v13, s0, s13, v4, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s17, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s16, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, v12, s16 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v15, s0, 0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s17, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s16, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s17, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s18, v6, v[1:2] +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s17, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v16, v4, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s17, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, v17, s0 +; GFX10-NEXT: v_add_co_u32 v16, s0, v5, 1 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v18, vcc_lo, s17, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v9, s0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s19, v8, v[0:1] +; GFX10-NEXT: v_sub_co_u32 v4, s0, v14, s16 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v5, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v17, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v9, s1, s14, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, s15, v3 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s2, s15, v3, s1 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s0, 0, v18, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s1, s19, v2, s1 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX10-NEXT: v_cmp_le_u32_e64 s1, s18, v9 +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s19, v16 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v14, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v10, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, s1 +; GFX10-NEXT: v_sub_co_u32 v10, s1, v9, s18 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, s2 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s2, 0, v2, s1 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s19, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v12, v3, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v5, v7, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s19, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s18, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo +; GFX10-NEXT: v_add_co_u32 v12, vcc_lo, v8, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v6, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s19, v14 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v17, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, vcc_lo, s19, v2, s1 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; GFX10-NEXT: v_sub_co_u32 v5, s1, v10, s18 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s1, 0, v2, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v12, v17, v18, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v14, v14, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v15, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v6, v12, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v10, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v14, s1 +; GFX10-NEXT: global_store_dwordx4 v11, v[0:3], s[8:9] +; GFX10-NEXT: global_store_dwordx4 v11, v[4:7], s[10:11] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i64> %x, %y store <2 x i64> %div, ptr addrspace(1) %out0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index cc0f7e2..4810540 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -16,10 +16,10 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB0_4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] @@ -50,18 +50,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -79,18 +74,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc @@ -102,26 +92,21 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v6, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v6, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v2, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v0, vcc ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v0 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 @@ -151,7 +136,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) { ; CHECK-NEXT: ; implicit-def: $vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 @@ -194,10 +179,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 -; CHECK-NEXT: s_sub_u32 s4, 0, s2 +; CHECK-NEXT: s_sub_u32 s10, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 -; CHECK-NEXT: s_subb_u32 s5, 0, s3 +; CHECK-NEXT: s_subb_u32 s11, 0, s3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 @@ -205,10 +190,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s10, v1 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 @@ -219,25 +204,20 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s10, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s10, v4 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 @@ -248,18 +228,13 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v5, vcc, v5, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -271,26 +246,21 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 -; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 -; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v1, s[4:5], v8, v1 +; CHECK-NEXT: v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v1, vcc, v1, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 @@ -362,229 +332,199 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v7 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v7, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v8 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v19, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v16, v19 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v16, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v5, v10 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, vcc, 0, v15, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v6, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v7, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v11 +; GISEL-NEXT: v_mul_lo_u32 v17, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v6, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v13 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v16 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v9 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v6 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v4 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v8, s[14:15], v15, v8 +; GISEL-NEXT: v_add_i32_e64 v9, s[14:15], v17, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v6 ; GISEL-NEXT: v_sub_i32_e64 v4, s[14:15], v12, v4 ; GISEL-NEXT: v_sub_i32_e64 v6, s[16:17], v13, v6 -; GISEL-NEXT: v_add_i32_e64 v8, s[18:19], v17, v8 -; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v10 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 -; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[18:19], v8, v10 +; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[8:9] +; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v1, v8, vcc ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v8 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], v3, v9, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v16, v5 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v7 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v5 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v8, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11] ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[10:11] ; GISEL-NEXT: v_subbrev_u32_e64 v19, vcc, 0, v3, s[12:13] ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v7, s[12:13] ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v14, v17, v15, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v5 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v19, v7 @@ -594,9 +534,9 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[8:9] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7] @@ -605,7 +545,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -622,7 +562,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 @@ -650,18 +590,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v14, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v13, vcc, v13, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc @@ -679,18 +614,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v15, vcc +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v13, v12 +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v3, vcc, v3, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -702,26 +632,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 -; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v13, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v2, vcc, 0, v3, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v3 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v2 ; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v11, v0, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v0 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 @@ -752,7 +677,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 ; CGP-NEXT: .LBB2_2: ; %Flow1 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -780,10 +705,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB2_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB2_8 ; CGP-NEXT: .LBB2_6: ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] @@ -814,18 +739,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc @@ -843,18 +763,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v13, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v10 +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -866,26 +781,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v6, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v6, v2 +; CGP-NEXT: v_mul_lo_u32 v5, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v6, v3 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v2, vcc ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v9, v2 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 @@ -915,7 +825,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr6 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB2_6 ; CGP-NEXT: .LBB2_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -994,18 +904,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v12, v3, v6 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -1022,18 +927,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v7, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v6, vcc, v6, v7, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc @@ -1045,24 +945,19 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v8, v3 +; CHECK-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v9, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v3, vcc, v3, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5] ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 @@ -1097,26 +992,26 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; GISEL-NEXT: s_sub_u32 s4, 0, 0x12d8fb +; GISEL-NEXT: s_sub_u32 s16, 0, 0x12d8fb ; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: s_subb_u32 s5, 0, 0 +; GISEL-NEXT: s_subb_u32 s17, 0, 0 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb +; GISEL-NEXT: s_sub_u32 s18, 0, 0x12d8fb ; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: s_subb_u32 s7, 0, 0 +; GISEL-NEXT: s_subb_u32 s19, 0, 0 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 ; GISEL-NEXT: v_trunc_f32_e32 v6, v6 ; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v7, s4, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v6 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s5, v5 -; GISEL-NEXT: v_mul_hi_u32 v11, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, s7, v5 -; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s16, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, s18, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s16, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, s17, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, s16, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, s19, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, s18, v5 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v9 ; GISEL-NEXT: v_mul_hi_u32 v15, v5, v9 @@ -1136,100 +1031,80 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v19, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v16 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v15, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v17, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v13, v18 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v16, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v19, v12 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v20, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v11, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v11, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v13, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v14, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v5, v9 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, s4, v9 -; GISEL-NEXT: v_mul_lo_u32 v11, s5, v9 -; GISEL-NEXT: v_mul_hi_u32 v13, s4, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_mul_lo_u32 v11, s16, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, s17, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, s16, v9 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, s7, v5 -; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v15, s4, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v9, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, s6, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, s19, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, s18, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, s16, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 +; GISEL-NEXT: v_mul_lo_u32 v18, s18, v6 ; GISEL-NEXT: v_mul_lo_u32 v19, v6, v8 ; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8 ; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v15, v9, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 -; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12 -; GISEL-NEXT: v_mul_lo_u32 v21, v6, v12 -; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v6, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v21, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v21, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v22, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v9 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v9 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v15, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v19, v18 +; GISEL-NEXT: v_addc_u32_e64 v13, s[8:9], v13, v20, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[10:11], v21, v8 +; GISEL-NEXT: v_addc_u32_e64 v8, s[12:13], v8, v22, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v13, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v13, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v8, v15, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v12, vcc +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 ; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7 ; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7 @@ -1239,48 +1114,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_mul_lo_u32 v17, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v18, v2, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v16, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v14, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v15, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v16 +; GISEL-NEXT: v_addc_u32_e64 v8, s[8:9], v8, v10, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v5, s[10:11], v17, v5 +; GISEL-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v9, v8, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, v5, v11, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v4 ; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v9 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12 +; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v10 ; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 @@ -1350,18 +1215,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v14, v5, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CGP-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v10, s[8:9], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -1378,18 +1238,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v13, v5, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v12, v8 +; CGP-NEXT: v_addc_u32_e64 v8, s[6:7], v8, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v9, s[8:9], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v8, vcc, v8, v9, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc @@ -1408,47 +1263,37 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_mul_hi_u32 v18, v2, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v14 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v18 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v9 +; CGP-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v14, s[4:5] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v10, v16 +; CGP-NEXT: v_addc_u32_e64 v8, s[8:9], v8, v11, s[8:9] +; CGP-NEXT: v_add_i32_e64 v5, s[10:11], v17, v5 +; CGP-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11] +; CGP-NEXT: v_addc_u32_e64 v8, s[14:15], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v10, s[6:7], 0, 0, s[8:9] +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[12:13] +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc +; CGP-NEXT: v_addc_u32_e64 v5, vcc, v5, v10, s[10:11] +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 ; CGP-NEXT: v_mul_hi_u32 v7, v7, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v9, v5, v4 ; CGP-NEXT: v_mul_hi_u32 v5, v5, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_mul_lo_u32 v8, v8, v4 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v9 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v10 ; CGP-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v11 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v9 ; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 @@ -1508,10 +1353,10 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CHECK-NEXT: s_cbranch_execnz .LBB7_3 ; CHECK-NEXT: ; %bb.1: ; %Flow -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execnz .LBB7_4 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] @@ -1542,18 +1387,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v10, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v9, vcc, v9, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc @@ -1571,18 +1411,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v8 +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v7, vcc, v7, v8, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc @@ -1594,26 +1429,21 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 -; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_add_i32_e64 v0, s[4:5], v9, v0 +; CHECK-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5] +; CHECK-NEXT: v_addc_u32_e64 v2, s[10:11], 0, 0, vcc +; CHECK-NEXT: v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; CHECK-NEXT: v_addc_u32_e64 v0, vcc, v0, v2, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v7, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v5, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v2 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v0, vcc ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 @@ -1643,7 +1473,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB7_2 ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 @@ -1681,229 +1511,199 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v5 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4 -; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc -; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_subb_u32_e64 v15, vcc, 0, v5, s[4:5] ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 -; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14 +; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v12 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 -; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10 -; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11 +; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v10 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v11 +; GISEL-NEXT: v_trunc_f32_e32 v12, v12 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13 -; GISEL-NEXT: v_trunc_f32_e32 v14, v14 -; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18 -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20 -; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v14, v10 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 +; GISEL-NEXT: v_mul_lo_u32 v20, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v10, v18 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, v19, v20, vcc +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v20, v15, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v20, v17 +; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v20 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19 -; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21 +; GISEL-NEXT: v_mul_lo_u32 v21, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v20, v21 +; GISEL-NEXT: v_mul_hi_u32 v21, v11, v19 +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19 -; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v12, v16 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v13, v17 +; GISEL-NEXT: v_add_i32_e64 v19, s[8:9], v20, v19 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16 -; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v16 +; GISEL-NEXT: v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17 -; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17] -; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19 -; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11 -; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v17, v13, v17 +; GISEL-NEXT: v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, v18, v20, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v18 +; GISEL-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GISEL-NEXT: v_addc_u32_e64 v18, vcc, 0, v18, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v20 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v14, v10 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10 +; GISEL-NEXT: v_mul_lo_u32 v20, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v15, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7] ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 -; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20 -; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v18 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v20 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v13, v20 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v6 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v6 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20 -; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 -; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19 -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10 +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v18, v20 +; GISEL-NEXT: v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, vcc, 0, v19, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v16 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc -; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v12, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v0, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v19, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v16, v19 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v15, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, vcc, 0, v15, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v18, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v17, v5, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v4, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v13 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v16 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v7 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v4 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v7 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 +; GISEL-NEXT: v_add_i32_e64 v6, s[14:15], v15, v6 +; GISEL-NEXT: v_add_i32_e64 v9, s[14:15], v17, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v7 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v4 ; GISEL-NEXT: v_sub_i32_e64 v7, s[14:15], v12, v7 ; GISEL-NEXT: v_sub_i32_e64 v4, s[16:17], v13, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[18:19], v17, v6 -; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 -; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v6, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[18:19], v6, v10 +; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[8:9] +; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v1, v6, vcc ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v6 ; GISEL-NEXT: v_subb_u32_e64 v6, s[6:7], v3, v9, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v16, v8 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v5 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v8 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11] ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v8, s[10:11] ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e64 v14, vcc, 0, v3, s[12:13] ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v5, s[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v15, v17, v15, s[8:9] ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v8 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15] ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 @@ -1913,9 +1713,9 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v17, s[8:9] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9] ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v12, v7, s[6:7] @@ -1924,7 +1724,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v3, s[8:9] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1944,7 +1744,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 @@ -1972,18 +1772,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v17, vcc +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v15, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v14, vcc, v14, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v16, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc @@ -2001,18 +1796,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v16, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v13 +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v12, vcc, v12, v13, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc @@ -2024,26 +1814,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc +; CGP-NEXT: v_add_i32_e64 v0, s[4:5], v14, v0 +; CGP-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v0, vcc, v0, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v12 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 @@ -2074,7 +1859,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_4 @@ -2104,10 +1889,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] ; CGP-NEXT: s_cbranch_execnz .LBB8_7 ; CGP-NEXT: ; %bb.5: ; %Flow -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execnz .LBB8_8 ; CGP-NEXT: .LBB8_6: ; CGP-NEXT: s_or_b64 exec, exec, s[4:5] @@ -2138,18 +1923,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v12, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v11, vcc, v11, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc @@ -2167,18 +1947,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v11, v8 +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v8, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v6, vcc, v6, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc @@ -2190,26 +1965,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_mul_lo_u32 v6, v9, v2 -; CGP-NEXT: v_mul_lo_u32 v8, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v11, v2 +; CGP-NEXT: v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5] +; CGP-NEXT: v_addc_u32_e64 v4, s[10:11], 0, 0, vcc +; CGP-NEXT: v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7] +; CGP-NEXT: v_addc_u32_e64 v2, vcc, v2, v4, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v6, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 ; CGP-NEXT: v_mul_lo_u32 v3, v9, v3 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v4 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v2, vcc ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v7, v2 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9 @@ -2239,7 +2009,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 -; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: s_cbranch_execz .LBB8_6 ; CGP-NEXT: .LBB8_8: ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 @@ -2376,39 +2146,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v20, vcc +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v19, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v23 +; GISEL-NEXT: v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[10:11], v24, v17 +; GISEL-NEXT: v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v16, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v16, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc +; GISEL-NEXT: v_addc_u32_e64 v15, vcc, v15, v18, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v19, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 @@ -2434,39 +2194,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v17, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v12 +; GISEL-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v18, v15 +; GISEL-NEXT: v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v20, v13 +; GISEL-NEXT: v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11] +; GISEL-NEXT: v_addc_u32_e64 v12, s[14:15], 0, 0, vcc +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7] +; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9] +; GISEL-NEXT: v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13] +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v9, v12, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, v10, v14, s[10:11] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 @@ -2484,39 +2234,37 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v11, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v7, v0, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 ; GISEL-NEXT: v_mul_lo_u32 v5, v0, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v9 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5] ; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index 674d7b6..2e61d87 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -270,6 +270,7 @@ body: | ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] @@ -277,33 +278,67 @@ body: | ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]] - ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]] + ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; MIPS32-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] ; MIPS32-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] ; MIPS32-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] - ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] - ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[MUL5]] - ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]] ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] - ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH1]] - ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[MUL5]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]] ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] - ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH2]] - ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] - ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32) - ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP5]] - ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[ADD2]] - ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] - ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32) - ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP6]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]] + ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]] + ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]] + ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]] + ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32) + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]] + ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]] + ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32) + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[COPY10]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD12]](s32), [[COPY10]] + ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD13]](s32), [[C]] + ; MIPS32-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[ICMP3]] + ; MIPS32-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ICMP8]], [[AND6]] + ; MIPS32-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD13]](s32) + ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[COPY11]], [[C]] + ; MIPS32-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C1]] + ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[AND7]] + ; MIPS32-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ADD15]](s32) + ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[COPY12]], [[COPY6]] + ; MIPS32-NEXT: [[ICMP10:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD16]](s32), [[COPY12]] + ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP11:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD17]](s32), [[C]] + ; MIPS32-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP11]], [[ICMP3]] + ; MIPS32-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ICMP10]], [[AND8]] + ; MIPS32-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ADD17]](s32) + ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[COPY13]], [[C]] + ; MIPS32-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C1]] + ; MIPS32-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[AND9]] + ; MIPS32-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[ADD19]](s32) ; MIPS32-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] ; MIPS32-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] ; MIPS32-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] @@ -311,17 +346,17 @@ body: | ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] ; MIPS32-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] ; MIPS32-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] - ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] - ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] - ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] - ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] - ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] - ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] - ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] + ; MIPS32-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] + ; MIPS32-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ADD20]], [[MUL8]] + ; MIPS32-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[MUL9]] + ; MIPS32-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[UMULH3]] + ; MIPS32-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[ADD23]], [[UMULH4]] + ; MIPS32-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH5]] + ; MIPS32-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ADD25]], [[COPY15]] ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) ; MIPS32-NEXT: $v1 = COPY [[COPY5]](s32) - ; MIPS32-NEXT: $a0 = COPY [[COPY10]](s32) - ; MIPS32-NEXT: $a1 = COPY [[ADD18]](s32) + ; MIPS32-NEXT: $a0 = COPY [[COPY14]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD26]](s32) ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 @@ -361,6 +396,7 @@ body: | ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] @@ -368,27 +404,47 @@ body: | ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]] - ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]] + ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] - ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] - ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] - ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[UMULH2]] - ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH1]] ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] - ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[ADD2]] - ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]] ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]] + ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY6]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]] + ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]] + ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]] + ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]] + ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32) + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]] + ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]] + ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32) ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] - ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] - ; MIPS32-NEXT: $v0 = COPY [[COPY8]](s32) - ; MIPS32-NEXT: $v1 = COPY [[ADD8]](s32) + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[COPY11]] + ; MIPS32-NEXT: $v0 = COPY [[COPY10]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD12]](s32) ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index c8c66fa..9ef2228 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -117,35 +117,64 @@ define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: mul $2, $6, $14 -; MIPS32-NEXT: mul $3, $7, $14 -; MIPS32-NEXT: mul $4, $6, $13 +; MIPS32-NEXT: ori $5, $zero, 0 +; MIPS32-NEXT: mul $4, $7, $14 +; MIPS32-NEXT: mul $10, $6, $13 ; MIPS32-NEXT: multu $6, $14 -; MIPS32-NEXT: mfhi $5 -; MIPS32-NEXT: addu $3, $3, $4 +; MIPS32-NEXT: mfhi $3 +; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: sltu $11, $4, $10 +; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: addu $3, $3, $5 -; MIPS32-NEXT: sltu $5, $3, $5 -; MIPS32-NEXT: addu $10, $4, $5 -; MIPS32-NEXT: mul $4, $8, $14 -; MIPS32-NEXT: mul $5, $7, $13 +; MIPS32-NEXT: addu $3, $3, $11 +; MIPS32-NEXT: sltiu $10, $3, 1 +; MIPS32-NEXT: and $10, $10, $11 +; MIPS32-NEXT: or $4, $4, $10 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $4, $5, $4 +; MIPS32-NEXT: mul $25, $8, $14 +; MIPS32-NEXT: mul $15, $7, $13 ; MIPS32-NEXT: mul $24, $6, $12 ; MIPS32-NEXT: multu $7, $14 -; MIPS32-NEXT: mfhi $15 -; MIPS32-NEXT: multu $6, $13 ; MIPS32-NEXT: mfhi $11 -; MIPS32-NEXT: addu $4, $4, $5 -; MIPS32-NEXT: sltu $5, $4, $5 -; MIPS32-NEXT: addu $4, $4, $24 -; MIPS32-NEXT: sltu $24, $4, $24 +; MIPS32-NEXT: multu $6, $13 +; MIPS32-NEXT: mfhi $10 +; MIPS32-NEXT: addu $25, $25, $15 +; MIPS32-NEXT: sltu $15, $25, $15 +; MIPS32-NEXT: addu $24, $25, $24 +; MIPS32-NEXT: sltu $25, $24, $25 +; MIPS32-NEXT: addu $24, $24, $15 +; MIPS32-NEXT: sltiu $gp, $24, 1 +; MIPS32-NEXT: and $gp, $gp, $15 +; MIPS32-NEXT: or $25, $25, $gp +; MIPS32-NEXT: andi $25, $25, 1 +; MIPS32-NEXT: addu $5, $5, $25 +; MIPS32-NEXT: addu $11, $24, $11 +; MIPS32-NEXT: sltu $24, $11, $24 +; MIPS32-NEXT: addu $11, $11, $15 +; MIPS32-NEXT: sltiu $25, $11, 1 +; MIPS32-NEXT: and $25, $25, $15 +; MIPS32-NEXT: or $24, $24, $25 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $24, $24, 1 ; MIPS32-NEXT: addu $5, $5, $24 -; MIPS32-NEXT: addu $4, $4, $15 -; MIPS32-NEXT: sltu $15, $4, $15 -; MIPS32-NEXT: addu $5, $5, $15 -; MIPS32-NEXT: addu $4, $4, $11 -; MIPS32-NEXT: sltu $11, $4, $11 +; MIPS32-NEXT: addu $10, $11, $10 +; MIPS32-NEXT: sltu $11, $10, $11 +; MIPS32-NEXT: addu $10, $10, $15 +; MIPS32-NEXT: sltiu $24, $10, 1 +; MIPS32-NEXT: and $24, $24, $15 +; MIPS32-NEXT: or $11, $11, $24 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $11, $11, 1 ; MIPS32-NEXT: addu $5, $5, $11 -; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: addu $4, $10, $4 ; MIPS32-NEXT: sltu $10, $4, $10 +; MIPS32-NEXT: addu $4, $4, $15 +; MIPS32-NEXT: sltiu $11, $4, 1 +; MIPS32-NEXT: and $11, $11, $15 +; MIPS32-NEXT: or $10, $10, $11 +; MIPS32-NEXT: addiu $5, $5, 0 +; MIPS32-NEXT: andi $10, $10, 1 ; MIPS32-NEXT: addu $5, $5, $10 ; MIPS32-NEXT: mul $1, $1, $14 ; MIPS32-NEXT: mul $11, $8, $13 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir index ec2dc56..d63d03e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir @@ -164,27 +164,39 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMULH]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[OR]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL %lo1, %hi2 ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH %mid1, %lo2 ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH %lo1, %mid2 - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[MUL5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[UMULH1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD5]](s32) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[ADD2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[MUL5]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP3]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[UMULH2]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP3]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY2]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ADD11]], [[ICMP3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD12]](s32) ; CHECK-NEXT: $x10 = COPY [[MUL]](s32) ; CHECK-NEXT: $x11 = COPY [[COPY1]](s32) - ; CHECK-NEXT: $x12 = COPY [[COPY6]](s32) + ; CHECK-NEXT: $x12 = COPY [[COPY7]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12 %lo1:_(s32) = COPY $x10 %mid1:_(s32) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir index 39d9c5b..931e3f5c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir @@ -196,27 +196,45 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), [[MUL2]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[ADD]](s64) ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[UMULH]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[UMULH]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD1]](s64) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[COPY]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[C1]], [[AND1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64) ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s64) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s64) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s64) = G_MUL %lo1, %hi2 ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s64) = G_UMULH %mid1, %lo2 ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s64) = G_UMULH %lo1, %mid2 - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[COPY2]], [[MUL5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD4]](s64), [[MUL4]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ADD4]](s64) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[UMULH1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD5]](s64) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[ADD2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD7]](s64) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[MUL5]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[ADD5]], [[ICMP3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s64) = G_ADD [[ADD7]], [[ICMP3]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD8]](s64) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[UMULH2]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s64) = G_ADD [[ADD9]], [[ICMP3]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD10]](s64) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s64) = G_ADD [[COPY6]], [[COPY2]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s64) = G_ADD [[ADD11]], [[ICMP3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY [[ADD12]](s64) ; CHECK-NEXT: $x10 = COPY [[MUL]](s64) ; CHECK-NEXT: $x11 = COPY [[COPY1]](s64) - ; CHECK-NEXT: $x12 = COPY [[COPY6]](s64) + ; CHECK-NEXT: $x12 = COPY [[COPY7]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12 %lo1:_(s64) = COPY $x10 %mid1:_(s64) = COPY $x11 |