aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h2
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp89
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir326
-rw-r--r--llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll194
5 files changed, 483 insertions, 139 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index da82904..3f1764b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -645,6 +645,8 @@ public:
/// KnownBits information.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+ bool combineMergedBFXCompare(MachineInstr &MI) const;
+
/// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index fc81ab7..c5f1310 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1085,6 +1085,14 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
+// Transform ((X | (G_UBFX X, ...) | ...) == 0) (or != 0)
+// into a compare of a extract/mask of X
+def icmp_merged_bfx_combine: GICombineRule<
+ (defs root:$root),
+ (combine (G_ICMP $dst, $p, $src, 0):$root,
+ [{ return Helper.combineMergedBFXCompare(*${root}); }])
+>;
+
def and_or_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -2066,7 +2074,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
simplify_neg_minmax, combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
- combine_use_vector_truncate, merge_combines, overflow_combines]>;
+ combine_use_vector_truncate, merge_combines, overflow_combines,
+ icmp_merged_bfx_combine]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index fc40533..e1d43f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -140,3 +140,92 @@ bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
return false;
}
+
+bool CombinerHelper::combineMergedBFXCompare(MachineInstr &MI) const {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ ICmpInst::Predicate CC = Cmp->getCond();
+ if (CC != CmpInst::ICMP_EQ && CC != CmpInst::ICMP_NE)
+ return false;
+
+ Register CmpLHS = Cmp->getLHSReg();
+ Register CmpRHS = Cmp->getRHSReg();
+
+ LLT OpTy = MRI.getType(CmpLHS);
+ if (!OpTy.isScalar() || OpTy.isPointer())
+ return false;
+
+ assert(isZeroOrZeroSplat(CmpRHS, /*AllowUndefs=*/false));
+
+ Register Src;
+ const auto IsSrc = [&](Register R) {
+ if (!Src) {
+ Src = R;
+ return true;
+ }
+
+ return Src == R;
+ };
+
+ MachineInstr *CmpLHSDef = MRI.getVRegDef(CmpLHS);
+ if (CmpLHSDef->getOpcode() != TargetOpcode::G_OR)
+ return false;
+
+ APInt PartsMask(OpTy.getSizeInBits(), 0);
+ SmallVector<MachineInstr *> Worklist = {CmpLHSDef};
+ while (!Worklist.empty()) {
+ MachineInstr *Cur = Worklist.pop_back_val();
+
+ Register Dst = Cur->getOperand(0).getReg();
+ if (!MRI.hasOneUse(Dst) && Dst != Src)
+ return false;
+
+ if (Cur->getOpcode() == TargetOpcode::G_OR) {
+ Worklist.push_back(MRI.getVRegDef(Cur->getOperand(1).getReg()));
+ Worklist.push_back(MRI.getVRegDef(Cur->getOperand(2).getReg()));
+ continue;
+ }
+
+ if (Cur->getOpcode() == TargetOpcode::G_UBFX) {
+ Register Op = Cur->getOperand(1).getReg();
+ Register Width = Cur->getOperand(2).getReg();
+ Register Off = Cur->getOperand(3).getReg();
+
+ auto WidthCst = getIConstantVRegVal(Width, MRI);
+ auto OffCst = getIConstantVRegVal(Off, MRI);
+ if (!WidthCst || !OffCst || !IsSrc(Op))
+ return false;
+
+ unsigned Start = OffCst->getZExtValue();
+ unsigned End = Start + WidthCst->getZExtValue();
+ if (End > OpTy.getScalarSizeInBits())
+ return false;
+ PartsMask.setBits(Start, End);
+ continue;
+ }
+
+ if (Cur->getOpcode() == TargetOpcode::G_AND) {
+ Register LHS = Cur->getOperand(1).getReg();
+ Register RHS = Cur->getOperand(2).getReg();
+
+ auto MaskCst = getIConstantVRegVal(RHS, MRI);
+ if (!MaskCst || !MaskCst->isMask() || !IsSrc(LHS))
+ return false;
+
+ PartsMask |= *MaskCst;
+ continue;
+ }
+
+ return false;
+ }
+
+ if (!PartsMask.isMask() || !Src)
+ return false;
+
+ assert(OpTy == MRI.getType(Src) && "Ignored a type casting operation?");
+ auto MaskedSrc =
+ Builder.buildAnd(OpTy, Src, Builder.buildConstant(OpTy, PartsMask));
+ Builder.buildICmp(CC, Cmp->getReg(0), MaskedSrc, CmpRHS, Cmp->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir
new file mode 100644
index 0000000..b96a677
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-cmp-merged-bfx.mir
@@ -0,0 +1,326 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: basic_i64_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: basic_i64_2x5
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s64), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s64) = COPY $vgpr0_vgpr1
+ %mask:_(s64) = G_CONSTANT i64 31
+ %reg_mask:_(s64) = G_AND %reg, %mask
+ %k:_(s64) = G_CONSTANT i64 5
+ %bfx:_(s64) = G_UBFX %reg, %k, %k
+ %x:_(s64) = G_OR %reg_mask, %bfx
+ %zero:_(s64) = G_CONSTANT i64 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i32_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i32_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_ne_i32_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_ne_i32_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ne), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ne), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i32_5x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i32_5x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %reg, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %five:_(s32) = G_CONSTANT i32 5
+ %bfx1:_(s32) = G_UBFX %reg, %five, %five
+ %x1:_(s32) = G_OR %reg_mask, %bfx1
+ %k2:_(s32) = G_CONSTANT i32 10
+ %bfx2:_(s32) = G_UBFX %reg, %k2, %five
+ %x2:_(s32) = G_OR %x1, %bfx2
+ %k3:_(s32) = G_CONSTANT i32 15
+ %bfx3:_(s32) = G_UBFX %reg, %k3, %five
+ %x3:_(s32) = G_OR %x2, %bfx3
+ %k4:_(s32) = G_CONSTANT i32 20
+ %bfx4:_(s32) = G_UBFX %reg, %k4, %five
+ %x4:_(s32) = G_OR %x3, %bfx4
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x4, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: basic_i16_2x5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: basic_i16_2x5
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg_trunc:_(s16) = G_TRUNC %reg(s32)
+ ; CHECK-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1023
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND %reg_trunc, [[C]]
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), [[AND]](s16), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg_trunc:_(s16) = G_TRUNC %reg
+ %mask:_(s16) = G_CONSTANT i16 31
+ %reg_mask:_(s16) = G_AND %reg_trunc, %mask
+ %k:_(s16) = G_CONSTANT i16 5
+ %bfx:_(s16) = G_UBFX %reg_trunc, %k, %k
+ %x:_(s16) = G_OR %reg_mask, %bfx
+ %zero:_(s16) = G_CONSTANT i16 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_sbfx
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: unsupported_sbfx
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_SBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_src_changes
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_src_changes
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_SBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_SBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_holes_in_mask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_holes_in_mask
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 6
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %k:_(s32) = G_CONSTANT i32 6
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_bfx_out_of_range
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: unsupported_bfx_out_of_range
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %reg2:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg2, %mask
+ ; CHECK-NEXT: %width:_(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: %off:_(s32) = G_CONSTANT i32 26
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %off(s32), %width
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %reg2:_(s32) = COPY $vgpr1
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg2, %mask
+ %width:_(s32) = G_CONSTANT i32 12
+ %off:_(s32) = G_CONSTANT i32 26
+ %bfx:_(s32) = G_UBFX %reg, %off, %width
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
+
+---
+name: unsupported_cc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: unsupported_cc
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: %reg_mask:_(s32) = G_AND %reg, %mask
+ ; CHECK-NEXT: %k:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %bfx:_(s32) = G_UBFX %reg, %k(s32), %k
+ ; CHECK-NEXT: %x:_(s32) = G_OR %reg_mask, %bfx
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(ule), %x(s32), %zero
+ ; CHECK-NEXT: %res:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK-NEXT: $vgpr0 = COPY %res(s32)
+ %reg:_(s32) = COPY $vgpr0
+ %mask:_(s32) = G_CONSTANT i32 31
+ %reg_mask:_(s32) = G_AND %reg, %mask
+ %k:_(s32) = G_CONSTANT i32 5
+ %bfx:_(s32) = G_UBFX %reg, %k, %k
+ %x:_(s32) = G_OR %reg_mask, %bfx
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ule), %x, %zero
+ %res:_(s32) = G_ZEXT %cmp
+ $vgpr0 = COPY %res
+...
diff --git a/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll b/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll
index 64d055b..487504b 100644
--- a/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll
@@ -9,77 +9,36 @@
; (workitem_id_x | workitem_id_y | workitem_id_z) == 0
define i1 @workitem_zero() {
-; DAGISEL-GFX8-LABEL: workitem_zero:
-; DAGISEL-GFX8: ; %bb.0: ; %entry
-; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX942-LABEL: workitem_zero:
-; DAGISEL-GFX942: ; %bb.0: ; %entry
-; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; DAGISEL-GFX942-NEXT: s_nop 1
-; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX12-LABEL: workitem_zero:
-; DAGISEL-GFX12: ; %bb.0: ; %entry
-; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; DAGISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
-; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX8-LABEL: workitem_zero:
-; GISEL-GFX8: ; %bb.0: ; %entry
-; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: workitem_zero:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX942-LABEL: workitem_zero:
-; GISEL-GFX942: ; %bb.0: ; %entry
-; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
-; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
-; GISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GISEL-GFX942-NEXT: s_nop 1
-; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: workitem_zero:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX12-LABEL: workitem_zero:
-; GISEL-GFX12: ; %bb.0: ; %entry
-; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
-; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
-; GISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
-; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-LABEL: workitem_zero:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%1 = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -92,77 +51,36 @@ entry:
; (workitem_id_x | workitem_id_y | workitem_id_z) != 0
define i1 @workitem_nonzero() {
-; DAGISEL-GFX8-LABEL: workitem_nonzero:
-; DAGISEL-GFX8: ; %bb.0: ; %entry
-; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX942-LABEL: workitem_nonzero:
-; DAGISEL-GFX942: ; %bb.0: ; %entry
-; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; DAGISEL-GFX942-NEXT: s_nop 1
-; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
-;
-; DAGISEL-GFX12-LABEL: workitem_nonzero:
-; DAGISEL-GFX12: ; %bb.0: ; %entry
-; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
-; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; DAGISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
-; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX8-LABEL: workitem_nonzero:
-; GISEL-GFX8: ; %bb.0: ; %entry
-; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
-; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: workitem_nonzero:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX942-LABEL: workitem_nonzero:
-; GISEL-GFX942: ; %bb.0: ; %entry
-; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
-; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
-; GISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GISEL-GFX942-NEXT: s_nop 1
-; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: workitem_nonzero:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX942-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX12-LABEL: workitem_nonzero:
-; GISEL-GFX12: ; %bb.0: ; %entry
-; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
-; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
-; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
-; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
-; GISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
-; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-LABEL: workitem_nonzero:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%1 = tail call i32 @llvm.amdgcn.workitem.id.y()