aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPoseydon42 <vvmposeydon@gmail.com>2024-05-30 17:31:03 +0100
committerGitHub <noreply@github.com>2024-05-30 18:31:03 +0200
commitcc2fafa1788908f69366821a04407083f770483e (patch)
tree3ff6f91668eefd13a5ff8ca906ae4b68fa7226cb
parenta8e03aed6ab2675b8d19f93657edc48c82e93625 (diff)
downloadllvm-cc2fafa1788908f69366821a04407083f770483e.zip
llvm-cc2fafa1788908f69366821a04407083f770483e.tar.gz
llvm-cc2fafa1788908f69366821a04407083f770483e.tar.bz2
[InstSimplify] Add constant folding support for `ucmp`/`scmp` intrinsics (#93730)
This PR adds support for folding calls to `ucmp`/`scmp` intrinsics with constant arguments.
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp17
-rw-r--r--llvm/test/Transforms/InstSimplify/uscmp.ll98
2 files changed, 115 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 5febe91..1d4272a 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1504,6 +1504,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -2773,6 +2775,21 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
? *C0
: *C1);
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
+ if (!C0 || !C1)
+ return ConstantInt::get(Ty, 0);
+
+ int Res;
+ if (IntrinsicID == Intrinsic::scmp)
+ Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
+ else
+ Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
+ return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
+
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - undef -> { 0, false }
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
new file mode 100644
index 0000000..adfcc31
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+
+define i8 @scmp_lt() {
+; CHECK-LABEL: define i8 @scmp_lt() {
+; CHECK-NEXT: ret i8 -1
+;
+ %1 = call i8 @llvm.scmp(i32 -7, i32 3)
+ ret i8 %1
+}
+
+define i8 @scmp_eq() {
+; CHECK-LABEL: define i8 @scmp_eq() {
+; CHECK-NEXT: ret i8 0
+;
+ %1 = call i8 @llvm.scmp(i32 2, i32 2)
+ ret i8 %1
+}
+
+define i8 @scmp_gt() {
+; CHECK-LABEL: define i8 @scmp_gt() {
+; CHECK-NEXT: ret i8 1
+;
+ %1 = call i8 @llvm.scmp(i32 2, i32 -7)
+ ret i8 %1
+}
+
+define i8 @ucmp_lt() {
+; CHECK-LABEL: define i8 @ucmp_lt() {
+; CHECK-NEXT: ret i8 -1
+;
+ %1 = call i8 @llvm.ucmp(i32 7, i32 12)
+ ret i8 %1
+}
+
+define i2 @ucmp_eq() {
+; CHECK-LABEL: define i2 @ucmp_eq() {
+; CHECK-NEXT: ret i2 0
+;
+ %1 = call i2 @llvm.ucmp(i32 12, i32 12)
+ ret i2 %1
+}
+
+define i100 @ucmp_gt() {
+; CHECK-LABEL: define i100 @ucmp_gt() {
+; CHECK-NEXT: ret i100 1
+;
+ %1 = call i100 @llvm.ucmp(i32 7, i32 3)
+ ret i100 %1
+}
+
+define i8 @ucmp_poison() {
+; CHECK-LABEL: define i8 @ucmp_poison() {
+; CHECK-NEXT: ret i8 poison
+;
+ %1 = call i8 @llvm.ucmp(i32 poison, i32 5)
+ ret i8 %1
+}
+
+define i8 @scmp_poison() {
+; CHECK-LABEL: define i8 @scmp_poison() {
+; CHECK-NEXT: ret i8 poison
+;
+ %1 = call i8 @llvm.scmp(i32 0, i32 poison)
+ ret i8 %1
+}
+
+define i8 @scmp_undef() {
+; CHECK-LABEL: define i8 @scmp_undef() {
+; CHECK-NEXT: ret i8 0
+;
+ %1 = call i8 @llvm.scmp(i32 undef, i32 -12)
+ ret i8 %1
+}
+
+define i8 @ucmp_undef() {
+; CHECK-LABEL: define i8 @ucmp_undef() {
+; CHECK-NEXT: ret i8 0
+;
+ %1 = call i8 @llvm.ucmp(i32 2, i32 undef)
+ ret i8 %1
+}
+
+define <4 x i8> @ucmp_lt_splat() {
+; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
+; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
+ ret <4 x i8> %1
+}
+
+define <4 x i8> @scmp_nonsplat() {
+; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
+; CHECK-NEXT: ret <4 x i8> <i8 1, i8 0, i8 1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
+ ret <4 x i8> %1
+}