aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2025-05-27 21:55:04 -0700
committerPeter Collingbourne <peter@pcc.me.uk>2025-05-27 21:55:04 -0700
commitb74ffea23f0cd7f96f35f467da82a302c425e9db (patch)
treef0d755b2f88916c2d8be848727eb94a5e9b3cd2a
parent782a9e9f64dfa21ca21f4b5c09569b35dd041bb0 (diff)
parent852b18f51c4f3636b8850c74497e96ec22667514 (diff)
downloadllvm-users/pcc/spr/codegenprepare-x86-support-sinking-phi-nodes.zip
llvm-users/pcc/spr/codegenprepare-x86-support-sinking-phi-nodes.tar.gz
llvm-users/pcc/spr/codegenprepare-x86-support-sinking-phi-nodes.tar.bz2
Created using spr 1.3.6-beta.1
-rw-r--r--llvm/include/llvm/IR/Instructions.h5
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp17
-rw-r--r--llvm/lib/IR/Instructions.cpp35
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp35
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll92
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll178
6 files changed, 349 insertions, 13 deletions
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c164f76..e822903 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2822,6 +2822,11 @@ public:
/// non-undef value.
bool hasConstantOrUndefValue() const;
+ /// If the specified PHI node (possibly via other PHI nodes) merges together
+ /// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
+ /// values, return one of the values, otherwise return null.
+ Value *hasIdenticalValue();
+
/// If the PHI node is complete which means all of its parent's predecessors
/// have incoming value in this PHI, return true, otherwise return false.
bool isComplete() const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 5226302..422916a 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (isa<PHINode>(UI))
- continue;
- if (UI->getParent() == TargetBB) {
+ if (auto *PN = dyn_cast<PHINode>(UI)) {
+ auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
+ if (!I0)
+ continue;
+ if (I0->getParent() == TargetBB &&
+ InstOrdering[I0] < InstOrdering[InsertPoint])
+ InsertPoint = I0;
+ } else if (UI->getParent() == TargetBB) {
if (InstOrdering[UI] < InstOrdering[InsertPoint])
InsertPoint = UI;
continue;
@@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
DenseMap<Instruction *, Instruction *> NewInstructions;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
- Instruction *NI = UI->clone();
+ Instruction *NI;
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
+ else
+ NI = UI->clone();
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index f404e11..9c59378 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -48,6 +48,7 @@
#include <cassert>
#include <cstdint>
#include <optional>
+#include <set>
#include <vector>
using namespace llvm;
@@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
return true;
}
+/// If the specified PHI node (possibly via other PHI nodes) merges together the
+/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
+/// return one of the values, otherwise return null.
+Value *PHINode::hasIdenticalValue() {
+ std::vector<PHINode *> Worklist;
+ std::set<PHINode *> Seen;
+ Value *Result = nullptr;
+ Worklist.push_back(this);
+ while (!Worklist.empty()) {
+ PHINode *PN = Worklist.back();
+ Worklist.pop_back();
+ if (!Seen.insert(PN).second)
+ continue;
+ for (Value *V : PN->incoming_values()) {
+ if (auto *PN = dyn_cast<PHINode>(V)) {
+ Worklist.push_back(PN);
+ continue;
+ }
+ if (!Result) {
+ Result = V;
+ continue;
+ }
+ if (V == Result)
+ continue;
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *ResultI = dyn_cast<Instruction>(Result))
+ if (I->isIdenticalTo(ResultI))
+ continue;
+ return nullptr;
+ }
+ }
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// LandingPadInst Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9864adc..5baf5e3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
using namespace llvm::PatternMatch;
FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
- if (!VTy)
- return false;
- if (I->getOpcode() == Instruction::Mul &&
+ if (VTy && I->getOpcode() == Instruction::Mul &&
VTy->getElementType()->isIntegerTy(64)) {
for (auto &Op : I->operands()) {
// Make sure we are not already sinking this operand
@@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
return !Ops.empty();
}
- // A uniform shift amount in a vector shift or funnel shift may be much
- // cheaper than a generic variable vector shift, so make that pattern visible
- // to SDAG by sinking the shuffle instruction next to the shift.
int ShiftAmountOpNum = -1;
if (I->isShift())
ShiftAmountOpNum = 1;
@@ -7170,16 +7165,38 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
II->getIntrinsicID() == Intrinsic::fshr)
ShiftAmountOpNum = 2;
}
-
if (ShiftAmountOpNum == -1)
return false;
+ auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);
+
+ Value *ShiftAmount = ShiftAmountUse->get();
+ if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
+ ShiftAmount = PN->hasIdenticalValue();
+ if (!ShiftAmount)
+ return false;
+ }
- auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
+ // A uniform shift amount in a vector shift or funnel shift may be much
+ // cheaper than a generic variable vector shift, so make that pattern visible
+ // to SDAG by sinking the shuffle instruction next to the shift.
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
- Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
+ Ops.push_back(ShiftAmountUse);
return true;
}
+ // Casts taking a constant expression (generally derived from a global
+ // variable address) as an operand are profitable to sink because they appear
+ // as subexpressions in the instruction sequence generated by the
+ // LowerTypeTests pass which is expected to pattern match to the rotate
+ // instruction's immediate operand.
+ if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
+ if (isa<ConstantExpr>(CI->getOperand(0))) {
+ Ops.push_back(ShiftAmountUse);
+ return true;
+ }
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
new file mode 100644
index 0000000..98f71cd
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
@@ -0,0 +1,92 @@
+; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.
+
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
+@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
+@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+
+; Check that we recover the third pair of zexts from the phi.
+
+; CHECK: define void @f4
+define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
+ br i1 %0, label %5, label %18
+
+5:
+ %6 = load ptr, ptr %1, align 8
+ %7 = ptrtoint ptr %6 to i64
+ %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %10 = lshr i64 %8, %9
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %12 = shl i64 %8, %11
+ %13 = or i64 %10, %12
+ %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %14, label %15, label %16
+
+15:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+16:
+ %17 = load ptr, ptr %6, align 8
+ tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
+ br label %31
+
+18:
+ %19 = load ptr, ptr %2, align 8
+ %20 = ptrtoint ptr %19 to i64
+ %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %23 = lshr i64 %21, %22
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %25 = shl i64 %21, %24
+ %26 = or i64 %23, %25
+ %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %27, label %28, label %29
+
+28:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+29:
+ %30 = load ptr, ptr %19, align 8
+ tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
+ br label %31
+
+31:
+ %32 = phi i64 [ %24, %29 ], [ %11, %16 ]
+ %33 = phi i64 [ %22, %29 ], [ %9, %16 ]
+ %34 = load ptr, ptr %3, align 8
+ %35 = ptrtoint ptr %34 to i64
+ %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %37 = lshr i64 %36, %33
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %38 = shl i64 %36, %32
+ %39 = or i64 %37, %38
+ %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %40, label %41, label %42
+
+41:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+42:
+ %43 = load ptr, ptr %34, align 8
+ tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
+ ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+!0 = !{i64 0, i64 256}
+!1 = !{i64 0, i64 128}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll
new file mode 100644
index 0000000..301f2248
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll
@@ -0,0 +1,178 @@
+; Make sure that if optimizations hoist or CSE zext(const) it gets undone by CodeGenPrepare.
+
+; This IR is normally generated by LowerTypeTests during ThinLTO importing
+; so it will go through the ThinLTO pass pipeline.
+; RUN: opt -passes='thinlto<O0>' -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -passes='thinlto<O1>' -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -passes='thinlto<O2>' -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -passes='thinlto<O3>' -S < %s | opt -codegenprepare -S | FileCheck %s
+
+; Also check the regular pipelines for completeness.
+; RUN: opt -O0 -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -O1 -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -O2 -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -O3 -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -Os -S < %s | opt -codegenprepare -S | FileCheck %s
+; RUN: opt -Oz -S < %s | opt -codegenprepare -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
+@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
+@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+
+; Check that we still have two pairs of zexts (non dominating case).
+
+; CHECK: define void @f1
+define void @f1(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
+ br i1 %0, label %4, label %17
+
+4:
+ %5 = load ptr, ptr %1, align 8
+ %6 = ptrtoint ptr %5 to i64
+ %7 = sub i64 %6, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %8 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %9 = lshr i64 %7, %8
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %10 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %11 = shl i64 %7, %10
+ %12 = or i64 %9, %11
+ %13 = icmp ule i64 %12, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %13, label %15, label %14
+
+14:
+ call void @llvm.ubsantrap(i8 2)
+ unreachable
+
+15:
+ %16 = load ptr, ptr %5, align 8
+ call void %16(ptr noundef nonnull align 8 dereferenceable(8) %1)
+ br label %30
+
+17:
+ %18 = load ptr, ptr %2, align 8
+ %19 = ptrtoint ptr %18 to i64
+ %20 = sub i64 %19, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %21 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %22 = lshr i64 %20, %21
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %23 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %24 = shl i64 %20, %23
+ %25 = or i64 %22, %24
+ %26 = icmp ule i64 %25, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %26, label %28, label %27
+
+27:
+ call void @llvm.ubsantrap(i8 2) #6
+ unreachable
+
+28:
+ %29 = load ptr, ptr %18, align 8
+ call void %29(ptr noundef nonnull align 8 dereferenceable(8) %2)
+ br label %30
+
+30:
+ ret void
+}
+
+; Check that we still have two pairs of zexts (dominating case).
+
+; CHECK: define void @f2
+define void @f2(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
+ %4 = load ptr, ptr %1, align 8
+ %5 = ptrtoint ptr %4 to i64
+ %6 = sub i64 %5, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %7 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %8 = lshr i64 %6, %7
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %9 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %10 = shl i64 %6, %9
+ %11 = or i64 %8, %10
+ %12 = icmp ule i64 %11, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %12, label %14, label %13
+
+13: ; preds = %3
+ call void @llvm.ubsantrap(i8 2)
+ unreachable
+
+14: ; preds = %3
+ %15 = load ptr, ptr %4, align 8
+ call void %15(ptr noundef nonnull align 8 dereferenceable(8) %1)
+ br i1 %0, label %16, label %29
+
+16: ; preds = %14
+ %17 = load ptr, ptr %2, align 8
+ %18 = ptrtoint ptr %17 to i64
+ %19 = sub i64 %18, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %20 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %21 = lshr i64 %19, %20
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %22 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %23 = shl i64 %19, %22
+ %24 = or i64 %21, %23
+ %25 = icmp ule i64 %24, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %25, label %27, label %26
+
+26: ; preds = %16
+ call void @llvm.ubsantrap(i8 2) #6
+ unreachable
+
+27: ; preds = %16
+ %28 = load ptr, ptr %17, align 8
+ call void %28(ptr noundef nonnull align 8 dereferenceable(8) %2)
+ br label %29
+
+29: ; preds = %27, %14
+ ret void
+}
+
+; Check that the zexts aren't moved to the preheader (or anywhere else)
+; and stay in the same basic block.
+
+; CHECK: define void @f3
+define void @f3(ptr noundef readonly captures(address) %0, ptr noundef readnone captures(address) %1) {
+ %3 = icmp eq ptr %0, %1
+ br i1 %3, label %21, label %4
+
+4:
+ ; CHECK: = phi
+ %5 = phi ptr [ %19, %17 ], [ %0, %2 ]
+ %6 = load ptr, ptr %5, align 8
+ %7 = load ptr, ptr %6, align 8
+ %8 = ptrtoint ptr %7 to i64
+ %9 = sub i64 %8, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %10 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %11 = lshr i64 %9, %10
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %12 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %13 = shl i64 %9, %12
+ %14 = or i64 %11, %13
+ %15 = icmp ule i64 %14, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %15, label %17, label %16
+
+16:
+ call void @llvm.ubsantrap(i8 2)
+ unreachable
+
+17:
+ %18 = load ptr, ptr %7, align 8
+ call void %18(ptr noundef nonnull align 8 dereferenceable(8) %6)
+ %19 = getelementptr inbounds nuw i8, ptr %5, i64 8
+ %20 = icmp eq ptr %19, %1
+ br i1 %20, label %21, label %4
+
+21:
+ ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+!0 = !{i64 0, i64 256}
+!1 = !{i64 0, i64 128}