aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetar Avramovic <Petar.Avramovic@amd.com>2025-03-12 11:09:50 +0100
committerGitHub <noreply@github.com>2025-03-12 11:09:50 +0100
commit3ad810ea9a97bb3399d8590372a6d3a0eb40d236 (patch)
treed36c2544ce107038a41c7461c56a6389d1989a7a
parent9f617161aa8db76baa4426e3c461c9ca91ea8103 (diff)
downloadllvm-3ad810ea9a97bb3399d8590372a6d3a0eb40d236.zip
llvm-3ad810ea9a97bb3399d8590372a6d3a0eb40d236.tar.gz
llvm-3ad810ea9a97bb3399d8590372a6d3a0eb40d236.tar.bz2
AMDGPU/GlobalISel: Disable LCSSA pass (#124297)
Disable LCSSA pass in preparation for implementing temporal divergence lowering in amdgpu divergence lowering. Breaks all cases where sgpr or i1 values are used outside of the cycle with divergent exit. Regenerate regression tests for amdgpu divergence lowering with LCSSA disabled. Update IntrinsicLaneMaskAnalyzer to stop tracking lcssa phis that are lane masks.
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir314
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll145
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir725
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll121
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir789
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir237
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir28
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir26
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir38
13 files changed, 1352 insertions, 1098 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 0b18c6b..47e32c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -91,25 +91,17 @@ void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI);
if (GI && GI->is(Intrinsic::amdgcn_if_break)) {
S32S64LaneMask.insert(MI.getOperand(3).getReg());
- findLCSSAPhi(MI.getOperand(0).getReg());
+ S32S64LaneMask.insert(MI.getOperand(0).getReg());
}
if (MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE) {
- findLCSSAPhi(MI.getOperand(0).getReg());
+ S32S64LaneMask.insert(MI.getOperand(0).getReg());
}
}
}
}
-void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
- S32S64LaneMask.insert(Reg);
- for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) {
- if (LCSSAPhi.isPHI())
- S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
- }
-}
-
static LLT getReadAnyLaneSplitTy(LLT Ty) {
if (Ty.isVector()) {
LLT ElTy = Ty.getElementType();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 27f8fed..70cfdac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -47,8 +47,6 @@ public:
private:
void initLaneMaskIntrinsics(MachineFunction &MF);
- // This will not be needed when we turn off LCSSA for global-isel.
- void findLCSSAPhi(Register Reg);
};
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4815f62..d5d375e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1383,7 +1383,11 @@ bool GCNPassConfig::addPreISel() {
// control flow modifications.
addPass(createAMDGPURewriteUndefForPHILegacyPass());
- addPass(createLCSSAPass());
+ // SDAG requires LCSSA, GlobalISel does not. Disable LCSSA for -global-isel
+ // with -new-reg-bank-select and without any of the fallback options.
+ if (!getCGPassBuilderOption().EnableGlobalISelOption ||
+ !isGlobalISelAbortEnabled() || !NewRegBankSelect)
+ addPass(createLCSSAPass());
if (TM->getOptLevel() > CodeGenOptLevel::Less)
addPass(&AMDGPUPerfHintAnalysisLegacyID);
@@ -2087,7 +2091,9 @@ void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
// control flow modifications.
addPass(AMDGPURewriteUndefForPHIPass());
- addPass(LCSSAPass());
+ if (!getCGPassBuilderOption().EnableGlobalISelOption ||
+ !isGlobalISelAbortEnabled() || !NewRegBankSelect)
+ addPass(LCSSAPass());
if (TM.getOptLevel() > CodeGenOptLevel::Less)
addPass(AMDGPUPerfHintAnalysisPass(TM));
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 6594d7f..6ce2f9b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -6,7 +6,7 @@
define void @divergent_i1_phi_uniform_branch_simple() {ret void}
define void @divergent_i1_phi_used_inside_loop() {ret void}
define void @divergent_i1_phi_used_inside_loop_bigger_loop_body() {ret void}
- define void @_amdgpu_cs_main() #0 {ret void}
+ define void @single_lane_execution_attribute() #0 {ret void}
attributes #0 = {"amdgpu-flat-work-group-size"="1,1"}
...
@@ -60,10 +60,10 @@ body: |
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C4]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
@@ -105,9 +105,9 @@ body: |
G_BR %bb.2
bb.4:
- %16:_(s32) = G_CONSTANT i32 2
- %17:_(s32) = G_CONSTANT i32 1
- %18:_(s32) = G_SELECT %13(s1), %17, %16
+ %16:_(s32) = G_SEXT %13(s1)
+ %17:_(s32) = G_CONSTANT i32 2
+ %18:_(s32) = G_ADD %16, %17
G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -149,10 +149,10 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
@@ -178,9 +178,9 @@ body: |
bb.2:
%11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1
- %12:_(s32) = G_CONSTANT i32 2
- %13:_(s32) = G_CONSTANT i32 1
- %14:_(s32) = G_SELECT %11(s1), %13, %12
+ %12:_(s32) = G_SEXT %11(s1)
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(s32) = G_ADD %12, %13
G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -199,39 +199,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -242,15 +233,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -262,13 +253,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %11(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %11(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -293,33 +282,30 @@ body: |
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -333,36 +319,30 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: G_STORE [[C7]](s32), [[MV2]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
+ ; GFX10-NEXT: G_STORE [[C6]](s32), [[MV2]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -380,26 +360,26 @@ body: |
%8:_(s32) = COPY $vgpr6
%9:_(s32) = COPY $vgpr7
%10:_(p0) = G_MERGE_VALUES %8(s32), %9(s32)
- %11:_(s32) = G_CONSTANT i32 0
- %12:_(s32) = G_FCONSTANT float 1.000000e+00
- %13:_(s1) = G_FCMP floatpred(ogt), %1(s32), %12
+ %11:_(s32) = G_FCONSTANT float 1.000000e+00
+ %12:_(s1) = G_FCMP floatpred(ogt), %1(s32), %11
+ %13:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.4(0x40000000), %bb.2(0x40000000)
- %14:_(s32) = G_PHI %15(s32), %bb.5, %11(s32), %bb.0
- %16:_(s32) = G_PHI %11(s32), %bb.0, %17(s32), %bb.5
- %18:_(s1) = G_PHI %13(s1), %bb.0, %19(s1), %bb.5
- %20:_(s1) = G_CONSTANT i1 true
- %21:_(s32) = G_CONSTANT i32 1000
- %22:_(s1) = G_ICMP intpred(sle), %16(s32), %21
- G_BRCOND %22(s1), %bb.4
+ %14:_(s32) = G_PHI %15(s32), %bb.5, %13(s32), %bb.0
+ %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.5
+ %18:_(s1) = G_PHI %12(s1), %bb.0, %19(s1), %bb.5
+ %20:_(s32) = G_CONSTANT i32 1000
+ %21:_(s1) = G_ICMP intpred(sle), %16(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ G_BRCOND %21(s1), %bb.4
G_BR %bb.2
bb.2:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %23:_(s1) = G_PHI %24(s1), %bb.4, %20(s1), %bb.1
+ %23:_(s1) = G_PHI %24(s1), %bb.4, %22(s1), %bb.1
%25:_(s1) = G_CONSTANT i1 true
%26:_(s1) = G_XOR %23, %25
G_BRCOND %26(s1), %bb.5
@@ -415,9 +395,9 @@ body: |
bb.4:
successors: %bb.2(0x80000000)
- %24:_(s1) = G_CONSTANT i1 false
%28:_(s32) = G_CONSTANT i32 1000
G_STORE %28(s32), %10(p0) :: (store (s32))
+ %24:_(s1) = G_CONSTANT i1 false
G_BR %bb.2
bb.5:
@@ -434,22 +414,20 @@ body: |
G_BR %bb.6
bb.6:
- %33:_(s1) = G_PHI %19(s1), %bb.5
- %34:_(s32) = G_PHI %15(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
- %35:_(s32) = G_FCONSTANT float 0.000000e+00
- %36:_(s32) = G_FCONSTANT float 1.000000e+00
- %37:_(s32) = G_SELECT %33(s1), %36, %35
- G_STORE %37(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
+ %33:_(s32) = G_FCONSTANT float 0.000000e+00
+ %34:_(s32) = G_FCONSTANT float 1.000000e+00
+ %35:_(s32) = G_SELECT %19(s1), %34, %33
+ G_STORE %35(s32), %4(p0) :: (store (s32))
SI_RETURN
...
---
-name: _amdgpu_cs_main
+name: single_lane_execution_attribute
legalized: true
tracksRegLiveness: true
body: |
- ; GFX10-LABEL: name: _amdgpu_cs_main
+ ; GFX10-LABEL: name: single_lane_execution_attribute
; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
@@ -464,24 +442,22 @@ body: |
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[ZEXT]]
; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](s64)
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x s32>))
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[BITCAST]](s256)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](s128)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (invariant load (<8 x s32>), align 16, addrspace 4)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](s32), [[C1]](s32)
- ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](s32), [[INT1]](s32)
- ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C4]](s32), [[C1]](s32)
+ ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C4]](s32), [[INT1]](s32)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]]
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FREEZE]], [[C3]](s32)
- ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](s32), [[C1]]
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C4]]
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32)
+ ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C2]]
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C5]]
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2
; GFX10-NEXT: G_BR %bb.1
@@ -495,8 +471,8 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %56(s1), %bb.4
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %29(s32), %bb.4, [[DEF]](s32), %bb.0
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5
; GFX10-NEXT: G_BR %bb.6
@@ -504,11 +480,11 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %34(s32), %bb.3, [[C6]](s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %36(s32), %bb.3, [[FREEZE]](s32), %bb.1
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %38(s32), %bb.3, [[C6]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %33(s32), %bb.3, [[C6]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.3, [[FREEZE]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %37(s32), %bb.3, [[C6]](s32), %bb.1
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]]
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C8]]
@@ -521,11 +497,10 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.3
- ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]]
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[AMDGPU_BUFFER_LOAD]]
; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]]
; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1)
+ ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1)
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -537,13 +512,13 @@ body: |
; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]]
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5
+ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]]
; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD3]], [[C12]](s32)
; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](s32), [[UV1]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI5]](s32), [[UV3]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -559,82 +534,79 @@ body: |
%7:_(s64) = G_ZEXT %0(s32)
%8:_(s64) = G_OR %6, %7
%9:_(p4) = G_INTTOPTR %8(s64)
- %10:_(<8 x s32>) = G_LOAD %9(p4) :: (load (<8 x s32>))
- %11:_(s256) = G_BITCAST %10(<8 x s32>)
- %12:_(s128) = G_TRUNC %11(s256)
- %13:_(<4 x s32>) = G_BITCAST %12(s128)
- %15:_(s32) = G_CONSTANT i32 0
- %14:_(s32) = G_CONSTANT i32 -1
- %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %14(s32), %15(s32)
- %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %14(s32), %16(s32)
- %18:_(s32) = G_FREEZE %17
- %19:_(s32) = G_CONSTANT i32 2
- %20:_(s32) = G_SHL %18, %19(s32)
- %21:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %15(s32), %20, %15, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
- %22:_(s1) = G_ICMP intpred(eq), %21(s32), %15
- %23:_(s32) = G_CONSTANT i32 1
- %24:_(s32) = G_AND %18, %23
- %25:_(s1) = G_TRUNC %24(s32)
- %26:_(s1) = G_CONSTANT i1 true
- %27:_(s1) = G_XOR %25, %26
- G_BRCOND %27(s1), %bb.2
+ %10:_(<8 x s32>) = G_LOAD %9(p4) :: (invariant load (<8 x s32>), align 16, addrspace 4)
+ %11:_(s32) = G_CONSTANT i32 0
+ %12:_(s32) = G_CONSTANT i32 1
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(<4 x s32>), %15:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
+ %16:_(s32) = G_CONSTANT i32 -1
+ %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %16(s32), %11(s32)
+ %18:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %16(s32), %17(s32)
+ %19:_(s32) = G_FREEZE %18
+ %20:_(s32) = G_SHL %19, %13(s32)
+ %21:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %11(s32), %20, %11, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
+ %22:_(s1) = G_ICMP intpred(eq), %21(s32), %11
+ %23:_(s32) = G_AND %19, %12
+ %24:_(s1) = G_TRUNC %23(s32)
+ %25:_(s1) = G_CONSTANT i1 true
+ %26:_(s1) = G_XOR %24, %25
+ G_BRCOND %26(s1), %bb.2
G_BR %bb.1
bb.1:
successors: %bb.3(0x80000000)
- %28:_(s32) = G_CONSTANT i32 0
+ %27:_(s32) = G_CONSTANT i32 0
G_BR %bb.3
bb.2:
successors: %bb.5(0x40000000), %bb.6(0x40000000)
- %29:_(s32) = G_PHI %30(s32), %bb.4, %3(s32), %bb.0
- %31:_(s1) = G_PHI %32(s1), %bb.4, %26(s1), %bb.0
- G_BRCOND %31(s1), %bb.5
+ %28:_(s32) = G_PHI %29(s32), %bb.4, %3(s32), %bb.0
+ %30:_(s1) = G_PHI %31(s1), %bb.4, %25(s1), %bb.0
+ G_BRCOND %30(s1), %bb.5
G_BR %bb.6
bb.3:
successors: %bb.4(0x04000000), %bb.3(0x7c000000)
- %33:_(s32) = G_PHI %34(s32), %bb.3, %28(s32), %bb.1
- %35:_(s32) = G_PHI %36(s32), %bb.3, %18(s32), %bb.1
- %37:_(s32) = G_PHI %38(s32), %bb.3, %28(s32), %bb.1
- %39:_(s32) = G_CONSTANT i32 0
- %40:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %39(s32), %33, %39, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
- %38:_(s32) = G_ADD %40, %37
- %41:_(s32) = G_CONSTANT i32 -1
- %36:_(s32) = G_ADD %35, %41
- %42:_(s32) = G_CONSTANT i32 4
- %34:_(s32) = G_ADD %33, %42
- %43:_(s1) = G_ICMP intpred(ne), %36(s32), %39
- G_BRCOND %43(s1), %bb.3
+ %32:_(s32) = G_PHI %33(s32), %bb.3, %27(s32), %bb.1
+ %34:_(s32) = G_PHI %35(s32), %bb.3, %19(s32), %bb.1
+ %36:_(s32) = G_PHI %37(s32), %bb.3, %27(s32), %bb.1
+ %38:_(s32) = G_CONSTANT i32 0
+ %39:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %38(s32), %32, %38, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
+ %37:_(s32) = G_ADD %39, %36
+ %40:_(s32) = G_CONSTANT i32 -1
+ %35:_(s32) = G_ADD %34, %40
+ %41:_(s32) = G_CONSTANT i32 4
+ %33:_(s32) = G_ADD %32, %41
+ %42:_(s1) = G_ICMP intpred(ne), %35(s32), %38
+ G_BRCOND %42(s1), %bb.3
G_BR %bb.4
bb.4:
successors: %bb.2(0x80000000)
- %44:_(s32) = G_PHI %38(s32), %bb.3
- %32:_(s1) = G_CONSTANT i1 false
- %45:_(s1) = G_ICMP intpred(eq), %44(s32), %21
- %46:_(s1) = G_OR %22, %45
- %30:_(s32) = G_ZEXT %46(s1)
+ %43:_(s1) = G_ICMP intpred(eq), %37(s32), %21
+ %44:_(s1) = G_OR %22, %43
+ %29:_(s32) = G_ZEXT %44(s1)
+ %31:_(s1) = G_CONSTANT i1 false
G_BR %bb.2
bb.5:
successors: %bb.6(0x80000000)
- %47:_(s32) = G_ZEXT %22(s1)
- %48:_(s32) = G_CONSTANT i32 2
- %49:_(s32) = G_OR %47, %48
+ %45:_(s32) = G_ZEXT %22(s1)
+ %46:_(s32) = G_CONSTANT i32 2
+ %47:_(s32) = G_OR %45, %46
bb.6:
- %50:_(s32) = G_PHI %29(s32), %bb.2, %49(s32), %bb.5
- %51:_(<4 x s32>), %52:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
- %53:_(s32) = G_ADD %2, %1
- %54:_(s32) = G_CONSTANT i32 2
- %55:_(s32) = G_SHL %53, %54(s32)
- %56:_(s32) = G_CONSTANT i32 0
- G_AMDGPU_BUFFER_STORE %50(s32), %52(<4 x s32>), %56(s32), %55, %56, 0, 0, 0 :: (store (s32), align 1, addrspace 8)
+ %48:_(s32) = G_PHI %28(s32), %bb.2, %47(s32), %bb.5
+ %49:_(<4 x s32>), %50:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
+ %51:_(s32) = G_ADD %2, %1
+ %52:_(s32) = G_CONSTANT i32 2
+ %53:_(s32) = G_SHL %51, %52(s32)
+ %54:_(s32) = G_CONSTANT i32 0
+ G_AMDGPU_BUFFER_STORE %48(s32), %50(<4 x s32>), %54(s32), %53, %54, 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index a66c21f..687452f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -178,6 +178,55 @@ exit:
ret void
}
+define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.cond.val, ptr %addr, ptr %addr2) {
+; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
+; GFX10-NEXT: v_mov_b32_e32 v1, s4
+; GFX10-NEXT: ; implicit-def: $sgpr6
+; GFX10-NEXT: .LBB3_1: ; %loop
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: v_cvt_f32_u32_e32 v6, v1
+; GFX10-NEXT: s_xor_b32 s5, s5, -1
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v0
+; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s6, s6, s7
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_cbranch_execnz .LBB3_1
+; GFX10-NEXT: ; %bb.2: ; %exit
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, 2.0, s6
+; GFX10-NEXT: flat_store_dword v[2:3], v0
+; GFX10-NEXT: flat_store_dword v[4:5], v1
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %pre.cond = fcmp ogt float %pre.cond.val, 1.0
+ br label %loop
+
+loop:
+ %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
+ %bool.counter = phi i1 [ %pre.cond, %entry ], [ %neg.bool.counter, %loop ]
+ %neg.bool.counter = xor i1 %bool.counter, true
+ %f.counter = uitofp i32 %counter to float
+ %cond = fcmp ogt float %f.counter, %val
+ %counter.plus.1 = add i32 %counter, 1
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
+ store float %select, ptr %addr
+ %select2 = select i1 %neg.bool.counter, float 2.000000e+00, float -1.000000e+00
+ store float %select2, ptr %addr2
+ ret void
+}
+
;void xor(int num_elts, int* a, int* addr) {
;for(int i=0; i<num_elts; ++i) {
; if(a[i]==0)
@@ -195,15 +244,15 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: s_mov_b32 s6, -1
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_6
+; GFX10-NEXT: s_cbranch_execz .LBB4_6
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
; GFX10-NEXT: v_mov_b32_e32 v5, s5
; GFX10-NEXT: ; implicit-def: $sgpr6
; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: ; implicit-def: $sgpr8
-; GFX10-NEXT: s_branch .LBB3_3
-; GFX10-NEXT: .LBB3_2: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: s_branch .LBB4_3
+; GFX10-NEXT: .LBB4_2: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX10-NEXT: s_xor_b32 s9, s8, -1
; GFX10-NEXT: s_and_b32 s10, exec_lo, s7
@@ -212,8 +261,8 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_and_b32 s9, exec_lo, s9
; GFX10-NEXT: s_or_b32 s6, s6, s9
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_cbranch_execz .LBB3_5
-; GFX10-NEXT: .LBB3_3: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB4_5
+; GFX10-NEXT: .LBB4_3: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -228,9 +277,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_2
+; GFX10-NEXT: s_cbranch_execz .LBB4_2
; GFX10-NEXT: ; %bb.4: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -240,20 +289,20 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_and_b32 s11, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s8, s8, s10
; GFX10-NEXT: s_or_b32 s7, s7, s11
-; GFX10-NEXT: s_branch .LBB3_2
-; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard
+; GFX10-NEXT: s_branch .LBB4_2
+; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo
; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
; GFX10-NEXT: s_or_b32 s6, s5, s6
-; GFX10-NEXT: .LBB3_6: ; %Flow1
+; GFX10-NEXT: .LBB4_6: ; %Flow1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB3_8
+; GFX10-NEXT: s_cbranch_execz .LBB4_8
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: flat_store_dword v[3:4], v0
-; GFX10-NEXT: .LBB3_8: ; %exit
+; GFX10-NEXT: .LBB4_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -299,9 +348,9 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: ; implicit-def: $sgpr6
; GFX10-NEXT: v_mov_b32_e32 v5, s5
-; GFX10-NEXT: s_branch .LBB4_2
-; GFX10-NEXT: .LBB4_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: s_branch .LBB5_2
+; GFX10-NEXT: .LBB5_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX10-NEXT: s_and_b32 s4, exec_lo, s7
; GFX10-NEXT: s_or_b32 s5, s4, s5
@@ -309,43 +358,43 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s6, s4, s6
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_cbranch_execz .LBB4_6
-; GFX10-NEXT: .LBB4_2: ; %cond.block.0
+; GFX10-NEXT: s_cbranch_execz .LBB5_6
+; GFX10-NEXT: .LBB5_2: ; %cond.block.0
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v4, v5
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_4
+; GFX10-NEXT: s_cbranch_execz .LBB5_4
; GFX10-NEXT: ; %bb.3: ; %if.block.0
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5]
; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
; GFX10-NEXT: global_store_dword v[8:9], v4, off
-; GFX10-NEXT: .LBB4_4: ; %loop.break.block
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: .LBB5_4: ; %loop.break.block
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4
; GFX10-NEXT: s_mov_b32 s7, -1
; GFX10-NEXT: ; implicit-def: $vgpr5
; GFX10-NEXT: s_and_saveexec_b32 s8, s4
-; GFX10-NEXT: s_cbranch_execz .LBB4_1
+; GFX10-NEXT: s_cbranch_execz .LBB5_1
; GFX10-NEXT: ; %bb.5: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4
; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo
; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
; GFX10-NEXT: s_or_b32 s7, s4, s7
-; GFX10-NEXT: s_branch .LBB4_1
-; GFX10-NEXT: .LBB4_6: ; %cond.block.1
+; GFX10-NEXT: s_branch .LBB5_1
+; GFX10-NEXT: .LBB5_6: ; %cond.block.1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB4_8
+; GFX10-NEXT: s_cbranch_execz .LBB5_8
; GFX10-NEXT: ; %bb.7: ; %if.block.1
; GFX10-NEXT: global_store_dword v[6:7], v4, off
-; GFX10-NEXT: .LBB4_8: ; %exit
+; GFX10-NEXT: .LBB5_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -410,9 +459,9 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: v_mov_b32_e32 v5, s0
; GFX10-NEXT: ; implicit-def: $sgpr1
; GFX10-NEXT: ; implicit-def: $sgpr2
-; GFX10-NEXT: s_branch .LBB5_2
-; GFX10-NEXT: .LBB5_1: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; GFX10-NEXT: s_branch .LBB6_2
+; GFX10-NEXT: .LBB6_1: ; %loop.cond
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5
@@ -423,16 +472,16 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: s_or_b32 s3, s3, s4
; GFX10-NEXT: s_or_b32 s1, s1, s4
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB5_4
-; GFX10-NEXT: .LBB5_2: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB6_4
+; GFX10-NEXT: .LBB6_2: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
; GFX10-NEXT: s_or_b32 s2, s2, s4
; GFX10-NEXT: s_and_saveexec_b32 s4, s3
-; GFX10-NEXT: s_cbranch_execz .LBB5_1
+; GFX10-NEXT: s_cbranch_execz .LBB6_1
; GFX10-NEXT: ; %bb.3: ; %is.eq.zero
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6]
@@ -444,8 +493,8 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s2, s2, s3
; GFX10-NEXT: ; implicit-def: $sgpr3
-; GFX10-NEXT: s_branch .LBB5_1
-; GFX10-NEXT: .LBB5_4: ; %exit
+; GFX10-NEXT: s_branch .LBB6_1
+; GFX10-NEXT: .LBB6_4: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s1
; GFX10-NEXT: flat_store_dword v[3:4], v0
@@ -486,9 +535,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: ; implicit-def: $sgpr2
; GFX10-NEXT: ; implicit-def: $sgpr3
; GFX10-NEXT: v_mov_b32_e32 v6, s0
-; GFX10-NEXT: s_branch .LBB6_2
-; GFX10-NEXT: .LBB6_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_branch .LBB7_2
+; GFX10-NEXT: .LBB7_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_b32 s4, exec_lo, s2
@@ -497,8 +546,8 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
; GFX10-NEXT: s_or_b32 s1, s1, s4
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_4
-; GFX10-NEXT: .LBB6_2: ; %A
+; GFX10-NEXT: s_cbranch_execz .LBB7_4
+; GFX10-NEXT: .LBB7_2: ; %A
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
@@ -513,9 +562,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB6_1
+; GFX10-NEXT: s_cbranch_execz .LBB7_1
; GFX10-NEXT: ; %bb.3: ; %loop.body
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6
@@ -531,16 +580,16 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9
; GFX10-NEXT: global_store_dword v[7:8], v9, off
-; GFX10-NEXT: s_branch .LBB6_1
-; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard
+; GFX10-NEXT: s_branch .LBB7_1
+; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_6
+; GFX10-NEXT: s_cbranch_execz .LBB7_6
; GFX10-NEXT: ; %bb.5: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: .LBB6_6: ; %exit
+; GFX10-NEXT: .LBB7_6: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
index 5bbe3e4..397a952 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
@@ -16,52 +16,43 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %22(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]]
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -73,16 +64,16 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
- %5:_(s32) = G_CONSTANT i32 0
- %6:_(s32) = G_FCONSTANT float 1.000000e+00
- %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6
+ %5:_(s32) = G_FCONSTANT float 1.000000e+00
+ %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0
- %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1
- %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1
+ %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1
+ %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1
%14:_(s1) = G_CONSTANT i1 true
%13:_(s1) = G_XOR %12, %14
%15:_(s32) = G_UITOFP %10(s32)
@@ -94,13 +85,11 @@ body: |
G_BR %bb.2
bb.2:
- %18:_(s1) = G_PHI %12(s1), %bb.1
- %19:_(s32) = G_PHI %9(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
- %20:_(s32) = G_FCONSTANT float 0.000000e+00
- %21:_(s32) = G_FCONSTANT float 1.000000e+00
- %22:_(s32) = G_SELECT %18(s1), %21, %20
- G_STORE %22(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
+ %18:_(s32) = G_FCONSTANT float 0.000000e+00
+ %19:_(s32) = G_FCONSTANT float 1.000000e+00
+ %20:_(s32) = G_SELECT %12(s1), %19, %18
+ G_STORE %20(s32), %4(p0) :: (store (s32))
SI_RETURN
...
@@ -120,9 +109,9 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
@@ -133,9 +122,9 @@ body: |
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.3
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.3
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %26(s1), %bb.3
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.3
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
@@ -175,15 +164,13 @@ body: |
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1)
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C7]], [[C6]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C7]], [[C6]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -196,15 +183,15 @@ body: |
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(p0) = G_MERGE_VALUES %3(s32), %4(s32)
- %6:_(s32) = G_CONSTANT i32 -1
- %7:_(s1) = G_CONSTANT i1 true
+ %6:_(s1) = G_CONSTANT i1 true
+ %7:_(s32) = G_CONSTANT i32 -1
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %8:_(s32) = G_PHI %6(s32), %bb.0, %9(s32), %bb.3
+ %8:_(s32) = G_PHI %7(s32), %bb.0, %9(s32), %bb.3
%10:_(p1) = G_PHI %2(p1), %bb.0, %11(p1), %bb.3
- %12:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.3
+ %12:sreg_32_xm0_xexec(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.3
%14:sreg_32_xm0_xexec(s32) = SI_IF %12(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
@@ -230,11 +217,10 @@ body: |
G_BR %bb.4
bb.4:
- %22:_(s1) = G_PHI %12(s1), %bb.3
- %23:_(s32) = G_FCONSTANT float 0.000000e+00
- %24:_(s32) = G_FCONSTANT float 1.000000e+00
- %25:_(s32) = G_SELECT %22(s1), %24, %23
- G_STORE %25(s32), %5(p0) :: (store (s32))
+ %22:_(s32) = G_FCONSTANT float 0.000000e+00
+ %23:_(s32) = G_FCONSTANT float 1.000000e+00
+ %24:_(s32) = G_SELECT %12(s1), %23, %22
+ G_STORE %24(s32), %5(p0) :: (store (s32))
SI_RETURN
...
@@ -253,43 +239,34 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %24(s1), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %22(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]]
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY5]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -301,16 +278,16 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
- %5:_(s32) = G_CONSTANT i32 0
- %6:_(s32) = G_FCONSTANT float 1.000000e+00
- %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6
+ %5:_(s32) = G_FCONSTANT float 1.000000e+00
+ %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0
- %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1
- %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1
+ %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1
+ %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1
%14:_(s1) = G_CONSTANT i1 true
%13:_(s1) = G_XOR %12, %14
%15:_(s32) = G_UITOFP %10(s32)
@@ -322,13 +299,109 @@ body: |
G_BR %bb.2
bb.2:
- %18:_(s1) = G_PHI %13(s1), %bb.1
- %19:_(s32) = G_PHI %9(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
- %20:_(s32) = G_FCONSTANT float 0.000000e+00
- %21:_(s32) = G_FCONSTANT float 1.000000e+00
- %22:_(s32) = G_SELECT %18(s1), %21, %20
- G_STORE %22(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
+ %18:_(s32) = G_FCONSTANT float 0.000000e+00
+ %19:_(s32) = G_FCONSTANT float 1.000000e+00
+ %20:_(s32) = G_SELECT %13(s1), %19, %18
+ G_STORE %20(s32), %4(p0) :: (store (s32))
+ SI_RETURN
+...
+
+
+---
+name: divergent_i1_xor_used_outside_loop_twice
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: divergent_i1_xor_used_outside_loop_twice
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %28(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %14(s32), %bb.1
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C7]], [[C6]]
+ ; GFX10-NEXT: G_STORE [[SELECT1]](s32), [[MV1]](p0) :: (store (s32))
+ ; GFX10-NEXT: SI_RETURN
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
+ %5:_(s32) = COPY $vgpr4
+ %6:_(s32) = COPY $vgpr5
+ %7:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
+ %8:_(s32) = G_FCONSTANT float 1.000000e+00
+ %9:_(s1) = G_FCMP floatpred(ogt), %1(s32), %8
+ %10:_(s32) = G_CONSTANT i32 0
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %11:_(s32) = G_PHI %12(s32), %bb.1, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.1
+ %15:_(s1) = G_PHI %9(s1), %bb.0, %16(s1), %bb.1
+ %17:_(s1) = G_CONSTANT i1 true
+ %16:_(s1) = G_XOR %15, %17
+ %18:_(s32) = G_UITOFP %13(s32)
+ %19:_(s1) = G_FCMP floatpred(ogt), %18(s32), %0
+ %20:_(s32) = G_CONSTANT i32 1
+ %14:_(s32) = G_ADD %13, %20
+ %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %19(s1), %11(s32)
+ SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.2
+
+ bb.2:
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %21:_(s32) = G_FCONSTANT float 0.000000e+00
+ %22:_(s32) = G_FCONSTANT float 1.000000e+00
+ %23:_(s32) = G_SELECT %16(s1), %22, %21
+ G_STORE %23(s32), %4(p0) :: (store (s32))
+ %24:_(s32) = G_FCONSTANT float -1.000000e+00
+ %25:_(s32) = G_FCONSTANT float 2.000000e+00
+ %26:_(s32) = G_SELECT %16(s1), %25, %24
+ G_STORE %26(s32), %7(p0) :: (store (s32))
SI_RETURN
...
@@ -364,13 +437,12 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %40(s1), %bb.8
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %38(s1), %bb.8
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -379,49 +451,47 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %73(s1), %bb.7
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %62(s1), %bb.7
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %49(s1), %bb.7
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %60(s1), %bb.7
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %47(s1), %bb.7
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C4]]
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.7(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]]
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]]
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
@@ -438,32 +508,26 @@ body: |
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]]
- ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]]
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI3]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.7
- ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY20]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -502,24 +566,24 @@ body: |
%16:_(s32) = G_PHI %12(s32), %bb.1, %17(s32), %bb.7
%18:_(s32) = G_PHI %19(s32), %bb.7, %12(s32), %bb.1
- %20:_(s1) = G_CONSTANT i1 true
- %21:_(s64) = G_SEXT %18(s32)
- %22:_(s32) = G_CONSTANT i32 2
- %23:_(s64) = G_SHL %21, %22(s32)
- %24:_(p1) = G_PTR_ADD %3, %23(s64)
- %25:_(s32) = G_LOAD %24(p1) :: (load (s32), addrspace 1)
- %26:_(s32) = G_CONSTANT i32 0
- %27:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %25(s32), %26
- %28:sreg_32_xm0_xexec(s32) = SI_IF %27(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %20:_(s64) = G_SEXT %18(s32)
+ %21:_(s32) = G_CONSTANT i32 2
+ %22:_(s64) = G_SHL %20, %21(s32)
+ %23:_(p1) = G_PTR_ADD %3, %22(s64)
+ %24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1)
+ %25:_(s32) = G_CONSTANT i32 0
+ %26:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %24(s32), %25
+ %27:_(s1) = G_CONSTANT i1 true
+ %28:sreg_32_xm0_xexec(s32) = SI_IF %26(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.4:
successors: %bb.7(0x80000000)
- %29:_(s1) = G_CONSTANT i1 false
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %18, %30
- %32:_(s1) = G_ICMP intpred(slt), %18(s32), %0
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %18, %29
+ %31:_(s1) = G_ICMP intpred(slt), %18(s32), %0
+ %32:_(s1) = G_CONSTANT i1 false
G_BR %bb.7
bb.5:
@@ -535,12 +599,12 @@ body: |
bb.7:
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
- %19:_(s32) = G_PHI %31(s32), %bb.4, %7(s32), %bb.3
- %34:_(s1) = G_PHI %29(s1), %bb.4, %20(s1), %bb.3
- %35:_(s1) = G_PHI %32(s1), %bb.4, %20(s1), %bb.3
+ %19:_(s32) = G_PHI %30(s32), %bb.4, %7(s32), %bb.3
+ %34:_(s1) = G_PHI %32(s1), %bb.4, %27(s1), %bb.3
+ %35:_(s1) = G_PHI %31(s1), %bb.4, %27(s1), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s32)
%36:_(s1) = G_CONSTANT i1 true
- %37:_(s1) = G_XOR %34, %36
+ %14:_(s1) = G_XOR %34, %36
%17:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %35(s1), %16(s32)
SI_LOOP %17(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.8
@@ -548,9 +612,7 @@ body: |
bb.8:
successors: %bb.2(0x80000000)
- %14:_(s1) = G_PHI %37(s1), %bb.7
- %38:_(s32) = G_PHI %17(s32), %bb.7
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
G_BR %bb.2
...
@@ -572,85 +634,75 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %39(s1), %bb.6
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]]
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]]
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
- ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]]
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[COPY6]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.6
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
- ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY12]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.9(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: G_STORE [[PHI6]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.9:
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
@@ -667,14 +719,14 @@ body: |
%5:_(s32) = COPY $vgpr6
%6:_(s32) = COPY $vgpr7
%7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32)
- %8:_(s32) = G_CONSTANT i32 0
- %9:_(s32) = G_IMPLICIT_DEF
+ %8:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x80000000)
- %10:_(s32) = G_PHI %11(s32), %bb.6, %8(s32), %bb.0
- %12:_(s32) = G_PHI %8(s32), %bb.0, %13(s32), %bb.6
+ %10:_(s32) = G_PHI %11(s32), %bb.6, %9(s32), %bb.0
+ %12:_(s32) = G_PHI %9(s32), %bb.0, %13(s32), %bb.6
bb.2:
successors: %bb.3(0x40000000), %bb.4(0x40000000)
@@ -695,24 +747,24 @@ body: |
bb.4:
successors: %bb.5(0x40000000), %bb.6(0x40000000)
- %20:_(s1) = G_CONSTANT i1 true
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
- %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12
- %22:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
+ %20:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12
+ %21:_(s1) = G_CONSTANT i1 true
+ %22:sreg_32_xm0_xexec(s32) = SI_IF %20(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
bb.5:
successors: %bb.6(0x80000000)
- %23:_(s1) = G_CONSTANT i1 false
- %24:_(s32) = G_CONSTANT i32 1
- %25:_(s32) = G_ADD %12, %24
+ %23:_(s32) = G_CONSTANT i32 1
+ %24:_(s32) = G_ADD %12, %23
+ %25:_(s1) = G_CONSTANT i1 false
bb.6:
successors: %bb.7(0x04000000), %bb.1(0x7c000000)
- %13:_(s32) = G_PHI %25(s32), %bb.5, %9(s32), %bb.4
- %26:_(s1) = G_PHI %23(s1), %bb.5, %20(s1), %bb.4
+ %13:_(s32) = G_PHI %24(s32), %bb.5, %8(s32), %bb.4
+ %26:_(s1) = G_PHI %25(s1), %bb.5, %21(s1), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %22(s32)
%11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %26(s1), %10(s32)
SI_LOOP %11(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -721,20 +773,17 @@ body: |
bb.7:
successors: %bb.8(0x40000000), %bb.9(0x40000000)
- %27:_(s32) = G_PHI %11(s32), %bb.6
- %28:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.6
- %29:_(s32) = G_PHI %12(s32), %bb.6
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %30:sreg_32_xm0_xexec(s32) = SI_IF %28(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32)
+ %27:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.8
bb.8:
successors: %bb.9(0x80000000)
- G_STORE %29(s32), %7(p1) :: (store (s32), addrspace 1)
+ G_STORE %12(s32), %7(p1) :: (store (s32), addrspace 1)
bb.9:
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
SI_RETURN
...
@@ -755,78 +804,69 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C]](s1)
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %54(s1), %bb.3
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %43(s1), %bb.3
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %33(s1), %bb.3
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %41(s1), %bb.3
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %31(s1), %bb.3
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %12(s32), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI2]](s1), %bb.1, [[DEF2]](s1), %bb.2
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI1]](s1), %bb.1, [[DEF1]](s1), %bb.2
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1)
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]]
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C4]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI3]](s32), [[COPY]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY16]](s1), [[C6]], [[C5]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FREEZE]](s1), [[C6]], [[C5]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
; GFX10-NEXT: S_ENDPGM 0
bb.0:
@@ -840,15 +880,15 @@ body: |
%4:_(s32) = COPY $vgpr3
%5:_(s32) = COPY $vgpr4
%6:_(p0) = G_MERGE_VALUES %4(s32), %5(s32)
- %7:_(s32) = G_CONSTANT i32 0
- %8:_(s1) = G_CONSTANT i1 true
+ %7:_(s1) = G_CONSTANT i1 true
+ %8:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %9:_(s32) = G_PHI %10(s32), %bb.3, %7(s32), %bb.0
- %11:_(s32) = G_PHI %7(s32), %bb.0, %12(s32), %bb.3
- %13:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %14(s1), %bb.3
+ %9:_(s32) = G_PHI %10(s32), %bb.3, %8(s32), %bb.0
+ %11:_(s32) = G_PHI %8(s32), %bb.0, %12(s32), %bb.3
+ %13:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %14(s1), %bb.3
%15:sreg_32_xm0_xexec(s32) = SI_IF %13(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
@@ -877,13 +917,11 @@ body: |
G_BR %bb.4
bb.4:
- %26:_(s1) = G_PHI %14(s1), %bb.3
- %27:_(s32) = G_PHI %10(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %28:_(s32) = G_FCONSTANT float 0.000000e+00
- %29:_(s32) = G_FCONSTANT float 1.000000e+00
- %30:_(s32) = G_SELECT %26(s1), %29, %28
- G_STORE %30(s32), %6(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %10(s32)
+ %26:_(s32) = G_FCONSTANT float 0.000000e+00
+ %27:_(s32) = G_FCONSTANT float 1.000000e+00
+ %28:_(s32) = G_SELECT %14(s1), %27, %26
+ G_STORE %28(s32), %6(p0) :: (store (s32))
S_ENDPGM 0
...
@@ -906,41 +944,38 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -954,24 +989,24 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -982,27 +1017,21 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -1017,23 +1046,23 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
bb.2:
@@ -1046,17 +1075,17 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %16, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %13, %30
- %33:_(s32) = G_CONSTANT i32 100
- %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %15, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %13, %29
+ %32:_(s32) = G_CONSTANT i32 100
+ %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32
+ %34:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -1066,9 +1095,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1
- %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1
- %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1
+ %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -1077,9 +1106,7 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5
- %39:_(s32) = G_PHI %12(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32)
- %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index 1698f84..ba30a4b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -472,8 +472,7 @@ exit:
}
; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir
-; with irreducible control flow graph. FixIrreducible converts it into natural
-; loop and in the process creates i1 phi with three incoming values.
+; with irreducible control flow graph.
; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) {
; do {
@@ -488,27 +487,99 @@ exit:
; return a0;
; }
-; This test is also interesting because it has phi with three incomings
-;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
-;.entry:
-; %.y_lt_a2 = icmp sgt i32 %a2, %y
-; %.x_lt_a2 = icmp sgt i32 %a2, %x
-; %.x_lt_a3 = icmp sgt i32 %a3, %x
-; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)'
-;
-;.preheader: ; if (y < a2),
-; br label %.inner_loop
-;
-;.inner_loop: ; do while x < a2
-; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit
-;
-;.loopexit: ; if x < a3
-; %not.inner_loop = xor i1 %.y_lt_a2, true
-; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)'
-; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends'
-; br i1 %brmerge, label %.exit, label %.preheader
-;
-;.exit:
-; ret i32 %.ret
-;}
+; This test is also interesting because it had phi with three incomings
+; After fa4cc9ddd58eb9fef2497e678873ff3b495340a3, FixIrreducible does not
+; generate phi with three incomings. There is a mir test with such phi.
+define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; GFX10-LABEL: irreducible_cfg:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v4, v1
+; GFX10-NEXT: s_and_b32 s0, 1, s0
+; GFX10-NEXT: ; implicit-def: $sgpr2
+; GFX10-NEXT: v_cmp_ne_u32_e64 s3, 0, s0
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_xor_b32 s1, vcc_lo, -1
+; GFX10-NEXT: s_mov_b32 s4, s1
+; GFX10-NEXT: s_branch .LBB6_2
+; GFX10-NEXT: .LBB6_1: ; %Flow2
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s7
+; GFX10-NEXT: s_or_b32 s0, s5, s0
+; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s3
+; GFX10-NEXT: s_or_b32 s2, s2, s5
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_execz .LBB6_8
+; GFX10-NEXT: .LBB6_2: ; %irr.guard
+; GFX10-NEXT: ; =>This Loop Header: Depth=1
+; GFX10-NEXT: ; Child Loop BB6_6 Depth 2
+; GFX10-NEXT: s_andn2_b32 s5, s3, exec_lo
+; GFX10-NEXT: s_and_b32 s3, exec_lo, s3
+; GFX10-NEXT: s_mov_b32 s6, -1
+; GFX10-NEXT: s_or_b32 s3, s5, s3
+; GFX10-NEXT: s_and_saveexec_b32 s5, s4
+; GFX10-NEXT: s_xor_b32 s4, exec_lo, s5
+; GFX10-NEXT: ; %bb.3: ; %.loopexit
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v5, v0
+; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
+; GFX10-NEXT: s_andn2_b32 s7, -1, exec_lo
+; GFX10-NEXT: s_or_b32 s5, vcc_lo, s1
+; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
+; GFX10-NEXT: s_xor_b32 s5, s5, -1
+; GFX10-NEXT: s_or_b32 s3, s3, s6
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s6, s7, s5
+; GFX10-NEXT: ; %bb.4: ; %Flow1
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_mov_b32 s4, -1
+; GFX10-NEXT: s_mov_b32 s7, -1
+; GFX10-NEXT: s_and_saveexec_b32 s5, s6
+; GFX10-NEXT: s_cbranch_execz .LBB6_1
+; GFX10-NEXT: ; %bb.5: ; %.preheader
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, v4, v0
+; GFX10-NEXT: .LBB6_6: ; %.inner_loop
+; GFX10-NEXT: ; Parent Loop BB6_2 Depth=1
+; GFX10-NEXT: ; => This Inner Loop Header: Depth=2
+; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s6, s7, s6
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s6
+; GFX10-NEXT: s_cbranch_execnz .LBB6_6
+; GFX10-NEXT: ; %bb.7: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6
+; GFX10-NEXT: s_andn2_b32 s6, -1, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
+; GFX10-NEXT: s_or_b32 s7, s6, s7
+; GFX10-NEXT: s_branch .LBB6_1
+; GFX10-NEXT: .LBB6_8: ; %.exit
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v3, s2
+; GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; GFX10-NEXT: ; return to shader part epilog
+.entry:
+ %.y_lt_a2 = icmp sgt i32 %a2, %y
+ %.x_lt_a2 = icmp sgt i32 %a2, %x
+ %.x_lt_a3 = icmp sgt i32 %a3, %x
+ br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)'
+
+.preheader: ; if (y < a2),
+ br label %.inner_loop
+
+.inner_loop: ; do while x < a2
+ br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit
+
+.loopexit: ; if x < a3
+ %not.inner_loop = xor i1 %.y_lt_a2, true
+ %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)'
+ %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends'
+ br i1 %brmerge, label %.exit, label %.preheader
+
+.exit:
+ ret i32 %.ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
index 39ebf66..bd7140d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
@@ -39,10 +39,10 @@ body: |
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -69,9 +69,9 @@ body: |
bb.2:
%12:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
- %13:_(s32) = G_CONSTANT i32 2
- %14:_(s32) = G_CONSTANT i32 1
- %15:_(s32) = G_SELECT %12(s1), %14, %13
+ %13:_(s32) = G_SEXT %12(s1)
+ %14:_(s32) = G_CONSTANT i32 2
+ %15:_(s32) = G_ADD %13, %14
G_STORE %15(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -135,10 +135,10 @@ body: |
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C3]], [[C4]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY11]](s1)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -179,9 +179,9 @@ body: |
bb.4:
%15:_(s1) = G_PHI %9(s1), %bb.1, %13(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32)
- %16:_(s32) = G_CONSTANT i32 1
+ %16:_(s32) = G_SEXT %15(s1)
%17:_(s32) = G_CONSTANT i32 2
- %18:_(s32) = G_SELECT %15(s1), %16, %17
+ %18:_(s32) = G_ADD %16, %17
G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -202,26 +202,26 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %35(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %34(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -259,8 +259,7 @@ body: |
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -272,30 +271,30 @@ body: |
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
- %6:_(s32) = G_CONSTANT i32 0
- %7:_(s32) = G_IMPLICIT_DEF
+ %6:_(s32) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %8:_(s32) = G_PHI %9(s32), %bb.3, %6(s32), %bb.0
- %10:_(s32) = G_PHI %6(s32), %bb.0, %11(s32), %bb.3
- %12:_(s1) = G_CONSTANT i1 true
- %13:_(s64) = G_SEXT %10(s32)
- %14:_(s32) = G_CONSTANT i32 2
- %15:_(s64) = G_SHL %13, %14(s32)
- %16:_(p1) = G_PTR_ADD %5, %15(s64)
- %17:_(s32) = G_LOAD %16(p1) :: (load (s32), addrspace 1)
- %18:_(s32) = G_CONSTANT i32 0
- %19:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %17(s32), %18
- %20:sreg_32_xm0_xexec(s32) = SI_IF %19(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %8:_(s32) = G_PHI %9(s32), %bb.3, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.3
+ %12:_(s64) = G_SEXT %10(s32)
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(s64) = G_SHL %12, %13(s32)
+ %15:_(p1) = G_PTR_ADD %5, %14(s64)
+ %16:_(s32) = G_LOAD %15(p1) :: (load (s32), addrspace 1)
+ %17:_(s32) = G_CONSTANT i32 0
+ %18:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %16(s32), %17
+ %19:_(s1) = G_CONSTANT i1 true
+ %20:sreg_32_xm0_xexec(s32) = SI_IF %18(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.3(0x80000000)
%21:_(s32) = G_CONSTANT i32 2
- %22:_(s64) = G_SHL %13, %21(s32)
+ %22:_(s64) = G_SHL %12, %21(s32)
%23:_(p1) = G_PTR_ADD %2, %22(s64)
%24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1)
%25:_(s32) = G_CONSTANT i32 1
@@ -308,16 +307,15 @@ body: |
bb.3:
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
- %11:_(s32) = G_PHI %27(s32), %bb.2, %7(s32), %bb.1
- %30:_(s1) = G_PHI %29(s1), %bb.2, %12(s1), %bb.1
+ %11:_(s32) = G_PHI %27(s32), %bb.2, %6(s32), %bb.1
+ %30:_(s1) = G_PHI %29(s1), %bb.2, %19(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s32)
%9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %30(s1), %8(s32)
SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.4:
- %31:_(s32) = G_PHI %9(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
S_ENDPGM 0
...
@@ -340,26 +338,26 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %48(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %47(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -370,14 +368,14 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]]
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
@@ -385,7 +383,7 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %46(s1), %bb.5
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
@@ -425,8 +423,7 @@ body: |
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -441,43 +438,43 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.3, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.3
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.3, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.3
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
- %24:_(s1) = G_CONSTANT i1 true
- %25:_(s32) = G_CONSTANT i32 2
- %26:_(s64) = G_SHL %16, %25(s32)
- %27:_(p1) = G_PTR_ADD %8, %26(s64)
- %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
- %29:_(s32) = G_CONSTANT i32 0
- %30:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %28(s32), %29
- %31:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %24:_(s32) = G_CONSTANT i32 2
+ %25:_(s64) = G_SHL %15, %24(s32)
+ %26:_(p1) = G_PTR_ADD %8, %25(s64)
+ %27:_(s32) = G_LOAD %26(p1) :: (load (s32), addrspace 1)
+ %28:_(s32) = G_CONSTANT i32 0
+ %29:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %27(s32), %28
+ %30:_(s1) = G_CONSTANT i1 true
+ %31:sreg_32_xm0_xexec(s32) = SI_IF %29(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.5, %10(s32), %bb.1
- %33:_(s1) = G_PHI %34(s1), %bb.5, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %32(s32), %bb.5, %9(s32), %bb.1
+ %33:_(s1) = G_PHI %34(s1), %bb.5, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -487,7 +484,7 @@ body: |
successors: %bb.5(0x80000000)
%35:_(s32) = G_CONSTANT i32 2
- %36:_(s64) = G_SHL %16, %35(s32)
+ %36:_(s64) = G_SHL %15, %35(s32)
%37:_(p1) = G_PTR_ADD %2, %36(s64)
%38:_(s32) = G_LOAD %37(p1) :: (load (s32), addrspace 1)
%39:_(s32) = G_CONSTANT i32 1
@@ -500,14 +497,13 @@ body: |
bb.5:
successors: %bb.3(0x80000000)
- %32:_(s32) = G_PHI %41(s32), %bb.4, %10(s32), %bb.2
- %34:_(s1) = G_PHI %43(s1), %bb.4, %24(s1), %bb.2
+ %32:_(s32) = G_PHI %41(s32), %bb.4, %9(s32), %bb.2
+ %34:_(s1) = G_PHI %43(s1), %bb.4, %30(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32)
G_BR %bb.3
bb.6:
- %44:_(s32) = G_PHI %12(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %44(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
S_ENDPGM 0
...
@@ -533,26 +529,26 @@ body: |
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %61(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %60(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -563,14 +559,14 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
@@ -578,7 +574,7 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %59(s1), %bb.5
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
@@ -589,14 +585,14 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32)
; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64)
; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C9]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C8]]
+ ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C9]](s1)
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1)
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP2]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
@@ -604,7 +600,7 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %72(s1), %bb.7
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %71(s1), %bb.7
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1)
@@ -646,8 +642,7 @@ body: |
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -665,43 +660,43 @@ body: |
%9:_(s32) = COPY $vgpr6
%10:_(s32) = COPY $vgpr7
%11:_(p1) = G_MERGE_VALUES %9(s32), %10(s32)
- %12:_(s32) = G_CONSTANT i32 0
- %13:_(s32) = G_IMPLICIT_DEF
+ %12:_(s32) = G_IMPLICIT_DEF
+ %13:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %14:_(s32) = G_PHI %15(s32), %bb.3, %12(s32), %bb.0
- %16:_(s32) = G_PHI %12(s32), %bb.0, %17(s32), %bb.3
- %18:_(s1) = G_CONSTANT i1 true
- %19:_(s64) = G_SEXT %16(s32)
- %20:_(s32) = G_CONSTANT i32 2
- %21:_(s64) = G_SHL %19, %20(s32)
- %22:_(p1) = G_PTR_ADD %5, %21(s64)
- %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1)
- %24:_(s32) = G_CONSTANT i32 0
- %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24
- %26:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %14:_(s32) = G_PHI %15(s32), %bb.3, %13(s32), %bb.0
+ %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.3
+ %18:_(s64) = G_SEXT %16(s32)
+ %19:_(s32) = G_CONSTANT i32 2
+ %20:_(s64) = G_SHL %18, %19(s32)
+ %21:_(p1) = G_PTR_ADD %5, %20(s64)
+ %22:_(s32) = G_LOAD %21(p1) :: (load (s32), addrspace 1)
+ %23:_(s32) = G_CONSTANT i32 0
+ %24:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %22(s32), %23
+ %25:_(s1) = G_CONSTANT i1 true
+ %26:sreg_32_xm0_xexec(s32) = SI_IF %24(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
- %27:_(s1) = G_CONSTANT i1 true
- %28:_(s32) = G_CONSTANT i32 2
- %29:_(s64) = G_SHL %19, %28(s32)
- %30:_(p1) = G_PTR_ADD %8, %29(s64)
- %31:_(s32) = G_LOAD %30(p1) :: (load (s32), addrspace 1)
- %32:_(s32) = G_CONSTANT i32 0
- %33:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %31(s32), %32
- %34:sreg_32_xm0_xexec(s32) = SI_IF %33(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %27:_(s32) = G_CONSTANT i32 2
+ %28:_(s64) = G_SHL %18, %27(s32)
+ %29:_(p1) = G_PTR_ADD %8, %28(s64)
+ %30:_(s32) = G_LOAD %29(p1) :: (load (s32), addrspace 1)
+ %31:_(s32) = G_CONSTANT i32 0
+ %32:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %30(s32), %31
+ %33:_(s1) = G_CONSTANT i1 true
+ %34:sreg_32_xm0_xexec(s32) = SI_IF %32(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.8(0x04000000), %bb.1(0x7c000000)
- %17:_(s32) = G_PHI %35(s32), %bb.5, %13(s32), %bb.1
- %36:_(s1) = G_PHI %37(s1), %bb.5, %18(s1), %bb.1
+ %17:_(s32) = G_PHI %35(s32), %bb.5, %12(s32), %bb.1
+ %36:_(s1) = G_PHI %37(s1), %bb.5, %25(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32)
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %14(s32)
SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -710,21 +705,21 @@ body: |
bb.4:
successors: %bb.6(0x40000000), %bb.7(0x40000000)
- %38:_(s1) = G_CONSTANT i1 true
- %39:_(s32) = G_CONSTANT i32 2
- %40:_(s64) = G_SHL %19, %39(s32)
- %41:_(p1) = G_PTR_ADD %11, %40(s64)
- %42:_(s32) = G_LOAD %41(p1) :: (load (s32), addrspace 1)
- %43:_(s32) = G_CONSTANT i32 0
- %44:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %42(s32), %43
- %45:sreg_32_xm0_xexec(s32) = SI_IF %44(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %38:_(s32) = G_CONSTANT i32 2
+ %39:_(s64) = G_SHL %18, %38(s32)
+ %40:_(p1) = G_PTR_ADD %11, %39(s64)
+ %41:_(s32) = G_LOAD %40(p1) :: (load (s32), addrspace 1)
+ %42:_(s32) = G_CONSTANT i32 0
+ %43:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %41(s32), %42
+ %44:_(s1) = G_CONSTANT i1 true
+ %45:sreg_32_xm0_xexec(s32) = SI_IF %43(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.5:
successors: %bb.3(0x80000000)
- %35:_(s32) = G_PHI %46(s32), %bb.7, %13(s32), %bb.2
- %37:_(s1) = G_PHI %47(s1), %bb.7, %27(s1), %bb.2
+ %35:_(s32) = G_PHI %46(s32), %bb.7, %12(s32), %bb.2
+ %37:_(s1) = G_PHI %47(s1), %bb.7, %33(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
G_BR %bb.3
@@ -732,7 +727,7 @@ body: |
successors: %bb.7(0x80000000)
%48:_(s32) = G_CONSTANT i32 2
- %49:_(s64) = G_SHL %19, %48(s32)
+ %49:_(s64) = G_SHL %18, %48(s32)
%50:_(p1) = G_PTR_ADD %2, %49(s64)
%51:_(s32) = G_LOAD %50(p1) :: (load (s32), addrspace 1)
%52:_(s32) = G_CONSTANT i32 1
@@ -745,14 +740,13 @@ body: |
bb.7:
successors: %bb.5(0x80000000)
- %46:_(s32) = G_PHI %54(s32), %bb.6, %13(s32), %bb.4
- %47:_(s1) = G_PHI %56(s1), %bb.6, %38(s1), %bb.4
+ %46:_(s32) = G_PHI %54(s32), %bb.6, %12(s32), %bb.4
+ %47:_(s1) = G_PHI %56(s1), %bb.6, %44(s1), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %45(s32)
G_BR %bb.5
bb.8:
- %57:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %57(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...
@@ -775,41 +769,38 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -823,24 +814,24 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -851,27 +842,21 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -886,23 +871,23 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
bb.2:
@@ -915,17 +900,17 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %16, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %13, %30
- %33:_(s32) = G_CONSTANT i32 100
- %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %15, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %13, %29
+ %32:_(s32) = G_CONSTANT i32 100
+ %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32
+ %34:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -935,9 +920,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1
- %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1
- %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1
+ %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -946,10 +931,8 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5
- %39:_(s32) = G_PHI %12(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32)
- %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
@@ -960,6 +943,223 @@ tracksRegLiveness: true
body: |
; GFX10-LABEL: name: irreducible_cfg
; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.8(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: G_BR %bb.8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %51(s1), %bb.5, %50(s1), %bb.8
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %39(s1), %bb.5, %38(s1), %bb.8
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[COPY10]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: G_BR %bb.4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.8(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY10]](s1), %bb.1, %58(s1), %bb.7
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %23(s1), %bb.7, [[DEF]](s1), %bb.1
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), %27(s32)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.4(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.2, %29(s32), %bb.4
+ ; GFX10-NEXT: [[INT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI4]](s32)
+ ; GFX10-NEXT: SI_LOOP [[INT1]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.7
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.5:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C4]]
+ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]]
+ ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]]
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %52(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_BR %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.6:
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
+ ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.7:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_BR %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.8:
+ ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[COPY14]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, [[COPY13]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1)
+ ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY23]](s1)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.5
+ bb.0:
+ successors: %bb.8(0x80000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s32) = COPY $vgpr4
+ %5:_(s32) = COPY $vgpr5
+ %6:_(s1) = G_IMPLICIT_DEF
+ %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %8:_(s1) = G_CONSTANT i1 true
+ %9:_(s1) = G_XOR %7, %8
+ %10:_(s32) = G_CONSTANT i32 0
+ G_BR %bb.8
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %11:_(s1) = G_PHI %12(s1), %bb.5, %13(s1), %bb.8
+ %14:sreg_32_xm0_xexec(s1) = G_PHI %15(s1), %bb.5, %16(s1), %bb.8
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
+ %18:_(s1) = G_CONSTANT i1 true
+ %19:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.2
+
+ bb.2:
+ successors: %bb.4(0x80000000)
+
+ %20:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %21:_(s32) = G_CONSTANT i32 0
+ G_BR %bb.4
+
+ bb.3:
+ successors: %bb.6(0x04000000), %bb.8(0x7c000000)
+
+ %22:_(s1) = G_PHI %23(s1), %bb.7, %6(s1), %bb.1
+ %24:_(s1) = G_PHI %25(s1), %bb.7, %18(s1), %bb.1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
+ %26:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %27(s32)
+ SI_LOOP %26(s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.6
+
+ bb.4:
+ successors: %bb.7(0x04000000), %bb.4(0x7c000000)
+
+ %28:_(s32) = G_PHI %21(s32), %bb.2, %29(s32), %bb.4
+ %29:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %20(s1), %28(s32)
+ SI_LOOP %29(s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.7
+
+ bb.5:
+ successors: %bb.1(0x80000000)
+
+ %12:_(s1) = G_ICMP intpred(sgt), %5(s32), %0
+ %30:_(s1) = G_FREEZE %7
+ %31:_(s1) = G_CONSTANT i1 true
+ %32:_(s1) = G_XOR %30, %31
+ %33:_(s1) = G_OR %12, %32
+ %15:_(s1) = G_XOR %33, %31
+ G_BR %bb.1
+
+ bb.6:
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32)
+ %34:_(s32) = G_SELECT %11(s1), %3, %2
+ %35:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %34(s32)
+ $sgpr0 = COPY %35(s32)
+ SI_RETURN_TO_EPILOG implicit $sgpr0
+
+ bb.7:
+ successors: %bb.3(0x80000000)
+
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %29(s32)
+ %25:_(s1) = G_CONSTANT i1 false
+ %23:_(s1) = G_CONSTANT i1 true
+ G_BR %bb.3
+
+ bb.8:
+ successors: %bb.5(0x40000000), %bb.1(0x40000000)
+
+ %27:_(s32) = G_PHI %26(s32), %bb.3, %10(s32), %bb.0
+ %13:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.3
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %9(s1), %bb.0, %22(s1), %bb.3
+ %16:_(s1) = G_CONSTANT i1 true
+ %17:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.5
+...
+
+# Keep original test that had phi with three incomings.
+# FixIrreducible no longer generates such phi.
+---
+name: irreducible_cfg_phi_with_three_incomings
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: irreducible_cfg_phi_with_three_incomings
+ ; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.7(0x80000000)
; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -969,26 +1169,25 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %53(s1), %bb.6, %57(s1), %bb.7
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %45(s1), %bb.6, %49(s1), %bb.7
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %34(s1), %bb.6, %33(s1), %bb.7
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
@@ -1015,27 +1214,23 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C2]]
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]]
- ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32)
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %49(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %26(s32)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT2]](s32)
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[COPY3]], [[COPY2]]
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1043,34 +1238,31 @@ body: |
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT1]](s32), %bb.3
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI9]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY17]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.1
bb.0:
successors: %bb.7(0x80000000)
@@ -1082,22 +1274,22 @@ body: |
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(s32) = COPY $vgpr5
- %6:_(s32) = G_CONSTANT i32 0
- %7:_(s1) = G_IMPLICIT_DEF
- %8:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %6:_(s1) = G_IMPLICIT_DEF
+ %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %8:_(s32) = G_CONSTANT i32 0
G_BR %bb.7
bb.1:
successors: %bb.3(0x80000000)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %9:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %10:_(s32) = G_CONSTANT i32 0
G_BR %bb.3
bb.2:
successors: %bb.4(0x40000000), %bb.7(0x40000000)
- %11:_(s1) = G_PHI %12(s1), %bb.6, %7(s1), %bb.7
+ %11:_(s1) = G_PHI %12(s1), %bb.6, %6(s1), %bb.7
%13:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
%16:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(s1), %17(s32)
@@ -1107,8 +1299,8 @@ body: |
bb.3:
successors: %bb.6(0x04000000), %bb.3(0x7c000000)
- %18:_(s32) = G_PHI %9(s32), %bb.1, %19(s32), %bb.3
- %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %18(s32)
+ %18:_(s32) = G_PHI %10(s32), %bb.1, %19(s32), %bb.3
+ %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %9(s1), %18(s32)
SI_LOOP %19(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
@@ -1117,18 +1309,18 @@ body: |
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
%20:_(s1) = G_ICMP intpred(sgt), %5(s32), %0
- %21:_(s1) = G_CONSTANT i1 true
- %22:_(s1) = G_XOR %8, %21
- %23:_(s1) = G_OR %20, %22
- %24:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %23(s1), %25(s32)
- SI_LOOP %24(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %21:_(s1) = G_FREEZE %7
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:_(s1) = G_XOR %21, %22
+ %24:_(s1) = G_OR %20, %23
+ %25:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %26(s32)
+ %27:_(s32) = G_CONSTANT i32 0
+ SI_LOOP %25(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
bb.5:
- %26:_(s1) = G_PHI %20(s1), %bb.4
- %27:_(s32) = G_PHI %24(s32), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %28:_(s32) = G_SELECT %26(s1), %3, %2
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %25(s32)
+ %28:_(s32) = G_SELECT %20(s1), %3, %2
%29:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %28(s32)
$sgpr0 = COPY %29(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1136,18 +1328,17 @@ body: |
bb.6:
successors: %bb.2(0x80000000)
- %30:_(s32) = G_PHI %19(s32), %bb.3
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
%12:_(s1) = G_CONSTANT i1 false
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32)
G_BR %bb.2
bb.7:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
- %25:_(s32) = G_PHI %24(s32), %bb.4, %25(s32), %bb.2, %6(s32), %bb.0
- %17:_(s32) = G_PHI %6(s32), %bb.4, %16(s32), %bb.2, %6(s32), %bb.0
- %31:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %11(s1), %bb.2, %21(s1), %bb.4
+ %26:_(s32) = G_PHI %25(s32), %bb.4, %26(s32), %bb.2, %8(s32), %bb.0
+ %17:_(s32) = G_PHI %27(s32), %bb.4, %16(s32), %bb.2, %8(s32), %bb.0
+ %30:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %11(s1), %bb.2, %22(s1), %bb.4
%14:_(s1) = G_CONSTANT i1 true
- %15:sreg_32_xm0_xexec(s32) = SI_IF %31(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ %15:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
index fb43662..e9a415c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
@@ -15,39 +15,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI2]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -58,15 +49,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -78,13 +69,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %10(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %10(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -102,39 +91,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -145,15 +125,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -165,13 +145,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %11(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %11(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -195,33 +173,30 @@ body: |
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr2
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr3
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %42(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -235,19 +210,19 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C6]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY2]]
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -258,25 +233,19 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C7]](s1), %bb.3, [[C3]](s1), %bb.1
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -292,23 +261,23 @@ body: |
%7:_(s32) = COPY $sgpr2
%8:_(s32) = COPY $sgpr3
%9:_(p1) = G_MERGE_VALUES %7(s32), %8(s32)
- %10:_(s32) = G_CONSTANT i32 0
- %11:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_IMPLICIT_DEF
+ %11:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x50000000), %bb.5(0x30000000)
- %12:_(s32) = G_PHI %13(s32), %bb.5, %10(s32), %bb.0
- %14:_(s32) = G_PHI %10(s32), %bb.0, %15(s32), %bb.5
- %16:_(s1) = G_CONSTANT i1 true
- %17:_(s64) = G_SEXT %14(s32)
- %18:_(s32) = G_CONSTANT i32 2
- %19:_(s64) = G_SHL %17, %18(s32)
- %20:_(p1) = G_PTR_ADD %6, %19(s64)
- %21:_(s32) = G_LOAD %20(p1) :: (load (s32), addrspace 1)
- %22:_(s32) = G_CONSTANT i32 0
- %23:_(s1) = G_ICMP intpred(ne), %21(s32), %22
- G_BRCOND %23(s1), %bb.3
+ %12:_(s32) = G_PHI %13(s32), %bb.5, %11(s32), %bb.0
+ %14:_(s32) = G_PHI %11(s32), %bb.0, %15(s32), %bb.5
+ %16:_(s64) = G_SEXT %14(s32)
+ %17:_(s32) = G_CONSTANT i32 2
+ %18:_(s64) = G_SHL %16, %17(s32)
+ %19:_(p1) = G_PTR_ADD %6, %18(s64)
+ %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
+ %21:_(s32) = G_CONSTANT i32 0
+ %22:_(s1) = G_ICMP intpred(ne), %20(s32), %21
+ %23:_(s1) = G_CONSTANT i1 true
+ G_BRCOND %22(s1), %bb.3
G_BR %bb.5
bb.2:
@@ -321,16 +290,16 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %17, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %14, %30
- %33:_(s1) = G_ICMP intpred(ult), %14(s32), %3
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %16, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %14, %29
+ %32:_(s1) = G_ICMP intpred(ult), %14(s32), %3
+ %33:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -340,9 +309,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %15:_(s32) = G_PHI %32(s32), %bb.3, %11(s32), %bb.1
- %35:_(s1) = G_PHI %25(s1), %bb.3, %16(s1), %bb.1
- %36:_(s1) = G_PHI %33(s1), %bb.3, %16(s1), %bb.1
+ %15:_(s32) = G_PHI %31(s32), %bb.3, %10(s32), %bb.1
+ %35:sreg_32_xm0_xexec(s1) = G_PHI %33(s1), %bb.3, %23(s1), %bb.1
+ %36:_(s1) = G_PHI %32(s1), %bb.3, %23(s1), %bb.1
%13:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %12(s32)
SI_LOOP %13(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
@@ -350,9 +319,7 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %37:sreg_32_xm0_xexec(s1) = G_PHI %35(s1), %bb.5
- %38:_(s32) = G_PHI %13(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32)
- %34:sreg_32_xm0_xexec(s32) = SI_IF %37(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s32)
+ %34:sreg_32_xm0_xexec(s32) = SI_IF %35(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
index d1b473f..996815e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
@@ -15,14 +15,14 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]]
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32)
@@ -32,10 +32,8 @@ body: |
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
- ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[MV]](p0) :: (store (s32))
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
successors: %bb.1(0x80000000)
@@ -45,14 +43,14 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s32) = G_CONSTANT i32 0
- %5:_(s32) = G_CONSTANT i32 -1
+ %4:_(s32) = G_CONSTANT i32 -1
+ %5:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
%10:_(s32) = G_CONSTANT i32 1
%9:_(s32) = G_ADD %8, %10
%11:_(s32) = G_UITOFP %9(s32)
@@ -62,9 +60,7 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- G_STORE %13(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ G_STORE %9(s32), %3(p0) :: (store (s32))
SI_RETURN
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
index 8f3495e..97dcd50 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
@@ -597,7 +597,7 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]]
@@ -609,11 +609,9 @@ body: |
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[C3]]
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[C3]]
; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -641,12 +639,10 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- %15:_(s32) = G_CONSTANT i32 10
- %16:_(s32) = G_MUL %13, %15
- G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %13:_(s32) = G_CONSTANT i32 10
+ %14:_(s32) = G_MUL %9, %13
+ G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -677,7 +673,7 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0
; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[PHI2]](s32)
@@ -760,8 +756,7 @@ body: |
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -861,7 +856,6 @@ body: |
G_BR %bb.3
bb.6:
- %64:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 951182f..44ff2b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -462,8 +462,7 @@ define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, p
; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1
; NEW_RBS-NEXT: ; %bb.2: ; %exit
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0
-; NEW_RBS-NEXT: v_mul_lo_u32 v0, v0, 10
+; NEW_RBS-NEXT: v_mul_lo_u32 v0, s0, 10
; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off
; NEW_RBS-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
index 1b22ee4b..06b0b72 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
@@ -971,12 +971,10 @@ body: |
; OLD_RBS-NEXT: G_BR %bb.2
; OLD_RBS-NEXT: {{ $}}
; OLD_RBS-NEXT: bb.2:
- ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
- ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]]
+ ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[COPY4]]
; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; OLD_RBS-NEXT: S_ENDPGM 0
;
@@ -995,7 +993,7 @@ body: |
; NEW_RBS-NEXT: bb.1:
; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; NEW_RBS-NEXT: {{ $}}
- ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0
+ ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0
; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]]
@@ -1008,12 +1006,11 @@ body: |
; NEW_RBS-NEXT: G_BR %bb.2
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: bb.2:
- ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
- ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
- ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY5]]
+ ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+ ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
+ ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY5]], [[COPY6]]
; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; NEW_RBS-NEXT: S_ENDPGM 0
bb.0:
@@ -1041,12 +1038,10 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- %15:_(s32) = G_CONSTANT i32 10
- %16:_(s32) = G_MUL %13, %15
- G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %13:_(s32) = G_CONSTANT i32 10
+ %14:_(s32) = G_MUL %9, %13
+ G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -1167,8 +1162,7 @@ body: |
; OLD_RBS-NEXT: G_BR %bb.3
; OLD_RBS-NEXT: {{ $}}
; OLD_RBS-NEXT: bb.6:
- ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; OLD_RBS-NEXT: S_ENDPGM 0
;
; NEW_RBS-LABEL: name: loop_with_2breaks
@@ -1193,7 +1187,7 @@ body: |
; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
- ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0
+ ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0
; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
@@ -1285,8 +1279,7 @@ body: |
; NEW_RBS-NEXT: G_BR %bb.3
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: bb.6:
- ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; NEW_RBS-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -1386,7 +1379,6 @@ body: |
G_BR %bb.3
bb.6:
- %64:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...