diff options
-rw-r--r-- | llvm/include/llvm/IR/Instructions.h | 5 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/IR/Instructions.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 35 | ||||
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll | 92 | ||||
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll | 178 |
6 files changed, 349 insertions, 13 deletions
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index c164f76..e822903 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2822,6 +2822,11 @@ public: /// non-undef value. bool hasConstantOrUndefValue() const; + /// If the specified PHI node (possibly via other PHI nodes) merges together + /// the same or identical (i.e. Instruction::isIdenticalTo() returns true) + /// values, return one of the values, otherwise return null. + Value *hasIdenticalValue(); + /// If the PHI node is complete which means all of its parent's predecessors /// have incoming value in this PHI, return true, otherwise return false. bool isComplete() const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5226302..422916a 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { for (Use *U : reverse(OpsToSink)) { auto *UI = cast<Instruction>(U->get()); - if (isa<PHINode>(UI)) - continue; - if (UI->getParent() == TargetBB) { + if (auto *PN = dyn_cast<PHINode>(UI)) { + auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue()); + if (!I0) + continue; + if (I0->getParent() == TargetBB && + InstOrdering[I0] < InstOrdering[InsertPoint]) + InsertPoint = I0; + } else if (UI->getParent() == TargetBB) { if (InstOrdering[UI] < InstOrdering[InsertPoint]) InsertPoint = UI; continue; @@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { DenseMap<Instruction *, Instruction *> NewInstructions; for (Use *U : ToReplace) { auto *UI = cast<Instruction>(U->get()); - Instruction *NI = UI->clone(); + Instruction *NI; + if (auto *PN = dyn_cast<PHINode>(UI)) + NI = cast<Instruction>(PN->hasIdenticalValue())->clone(); + else + NI = UI->clone(); if (IsHugeFunc) { // Now we clone an instruction, its operands' defs may sink to this BB diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index f404e11..9c59378 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -48,6 +48,7 @@ #include <cassert> #include <cstdint> #include <optional> +#include <set> #include <vector> using namespace llvm; @@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const { return true; } +/// If the specified PHI node (possibly via other PHI nodes) merges together the +/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values, +/// return one of the values, otherwise return null. +Value *PHINode::hasIdenticalValue() { + std::vector<PHINode *> Worklist; + std::set<PHINode *> Seen; + Value *Result = nullptr; + Worklist.push_back(this); + while (!Worklist.empty()) { + PHINode *PN = Worklist.back(); + Worklist.pop_back(); + if (!Seen.insert(PN).second) + continue; + for (Value *V : PN->incoming_values()) { + if (auto *PN = dyn_cast<PHINode>(V)) { + Worklist.push_back(PN); + continue; + } + if (!Result) { + Result = V; + continue; + } + if (V == Result) + continue; + if (auto *I = dyn_cast<Instruction>(V)) + if (auto *ResultI = dyn_cast<Instruction>(Result)) + if (I->isIdenticalTo(ResultI)) + continue; + return nullptr; + } + } + return Result; +} + //===----------------------------------------------------------------------===// // LandingPadInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9864adc..5baf5e3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, using namespace llvm::PatternMatch; FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType()); - if (!VTy) - return false; - if (I->getOpcode() == Instruction::Mul && + if (VTy && I->getOpcode() == Instruction::Mul && VTy->getElementType()->isIntegerTy(64)) { for (auto &Op : I->operands()) { // Make sure we are not already sinking this operand @@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, return !Ops.empty(); } - // A uniform shift amount in a vector shift or funnel shift may be much - // cheaper than a generic variable vector shift, so make that pattern visible - // to SDAG by sinking the shuffle instruction next to the shift. int ShiftAmountOpNum = -1; if (I->isShift()) ShiftAmountOpNum = 1; @@ -7170,16 +7165,38 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, II->getIntrinsicID() == Intrinsic::fshr) ShiftAmountOpNum = 2; } - if (ShiftAmountOpNum == -1) return false; + auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum); + + Value *ShiftAmount = ShiftAmountUse->get(); + if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) { + ShiftAmount = PN->hasIdenticalValue(); + if (!ShiftAmount) + return false; + } - auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum)); + // A uniform shift amount in a vector shift or funnel shift may be much + // cheaper than a generic variable vector shift, so make that pattern visible + // to SDAG by sinking the shuffle instruction next to the shift. + auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount); if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && isVectorShiftByScalarCheap(I->getType())) { - Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); + Ops.push_back(ShiftAmountUse); return true; } + // Casts taking a constant expression (generally derived from a global + // variable address) as an operand are profitable to sink because they appear + // as subexpressions in the instruction sequence generated by the + // LowerTypeTests pass which is expected to pattern match to the rotate + // instruction's immediate operand. + if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) { + if (isa<ConstantExpr>(CI->getOperand(0))) { + Ops.push_back(ShiftAmountUse); + return true; + } + } + return false; } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll new file mode 100644 index 0000000..98f71cd --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll @@ -0,0 +1,92 @@ +; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare. + +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small" +@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0 +@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1 + +; Check that we recover the third pair of zexts from the phi. + +; CHECK: define void @f4 +define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 { + br i1 %0, label %5, label %18 + +5: + %6 = load ptr, ptr %1, align 8 + %7 = ptrtoint ptr %6 to i64 + %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %10 = lshr i64 %8, %9 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %12 = shl i64 %8, %11 + %13 = or i64 %10, %12 + %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %14, label %15, label %16 + +15: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +16: + %17 = load ptr, ptr %6, align 8 + tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1) + br label %31 + +18: + %19 = load ptr, ptr %2, align 8 + %20 = ptrtoint ptr %19 to i64 + %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %23 = lshr i64 %21, %22 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %25 = shl i64 %21, %24 + %26 = or i64 %23, %25 + %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %27, label %28, label %29 + +28: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +29: + %30 = load ptr, ptr %19, align 8 + tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %31 + +31: + %32 = phi i64 [ %24, %29 ], [ %11, %16 ] + %33 = phi i64 [ %22, %29 ], [ %9, %16 ] + %34 = load ptr, ptr %3, align 8 + %35 = ptrtoint ptr %34 to i64 + %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %37 = lshr i64 %36, %33 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %38 = shl i64 %36, %32 + %39 = or i64 %37, %38 + %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %40, label %41, label %42 + +41: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +42: + %43 = load ptr, ptr %34, align 8 + tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3) + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +!0 = !{i64 0, i64 256} +!1 = !{i64 0, i64 128} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll new file mode 100644 index 0000000..301f2248 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist.ll @@ -0,0 +1,178 @@ +; Make sure that if optimizations hoist or CSE zext(const) it gets undone by CodeGenPrepare. + +; This IR is normally generated by LowerTypeTests during ThinLTO importing +; so it will go through the ThinLTO pass pipeline. +; RUN: opt -passes='thinlto<O0>' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto<O1>' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto<O2>' -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes='thinlto<O3>' -S < %s | opt -codegenprepare -S | FileCheck %s + +; Also check the regular pipelines for completeness. +; RUN: opt -O0 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O1 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O2 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -O3 -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -Os -S < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -Oz -S < %s | opt -codegenprepare -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small" +@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0 +@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1 + +; Check that we still have two pairs of zexts (non dominating case). + +; CHECK: define void @f1 +define void @f1(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) { + br i1 %0, label %4, label %17 + +4: + %5 = load ptr, ptr %1, align 8 + %6 = ptrtoint ptr %5 to i64 + %7 = sub i64 %6, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %8 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %9 = lshr i64 %7, %8 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %10 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %11 = shl i64 %7, %10 + %12 = or i64 %9, %11 + %13 = icmp ule i64 %12, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %13, label %15, label %14 + +14: + call void @llvm.ubsantrap(i8 2) + unreachable + +15: + %16 = load ptr, ptr %5, align 8 + call void %16(ptr noundef nonnull align 8 dereferenceable(8) %1) + br label %30 + +17: + %18 = load ptr, ptr %2, align 8 + %19 = ptrtoint ptr %18 to i64 + %20 = sub i64 %19, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %21 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %22 = lshr i64 %20, %21 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %23 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %24 = shl i64 %20, %23 + %25 = or i64 %22, %24 + %26 = icmp ule i64 %25, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %26, label %28, label %27 + +27: + call void @llvm.ubsantrap(i8 2) #6 + unreachable + +28: + %29 = load ptr, ptr %18, align 8 + call void %29(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %30 + +30: + ret void +} + +; Check that we still have two pairs of zexts (dominating case). + +; CHECK: define void @f2 +define void @f2(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) { + %4 = load ptr, ptr %1, align 8 + %5 = ptrtoint ptr %4 to i64 + %6 = sub i64 %5, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %7 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %8 = lshr i64 %6, %7 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %9 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %10 = shl i64 %6, %9 + %11 = or i64 %8, %10 + %12 = icmp ule i64 %11, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %12, label %14, label %13 + +13: ; preds = %3 + call void @llvm.ubsantrap(i8 2) + unreachable + +14: ; preds = %3 + %15 = load ptr, ptr %4, align 8 + call void %15(ptr noundef nonnull align 8 dereferenceable(8) %1) + br i1 %0, label %16, label %29 + +16: ; preds = %14 + %17 = load ptr, ptr %2, align 8 + %18 = ptrtoint ptr %17 to i64 + %19 = sub i64 %18, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %20 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %21 = lshr i64 %19, %20 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %22 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %23 = shl i64 %19, %22 + %24 = or i64 %21, %23 + %25 = icmp ule i64 %24, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %25, label %27, label %26 + +26: ; preds = %16 + call void @llvm.ubsantrap(i8 2) #6 + unreachable + +27: ; preds = %16 + %28 = load ptr, ptr %17, align 8 + call void %28(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %29 + +29: ; preds = %27, %14 + ret void +} + +; Check that the zexts aren't moved to the preheader (or anywhere else) +; and stay in the same basic block. + +; CHECK: define void @f3 +define void @f3(ptr noundef readonly captures(address) %0, ptr noundef readnone captures(address) %1) { + %3 = icmp eq ptr %0, %1 + br i1 %3, label %21, label %4 + +4: + ; CHECK: = phi + %5 = phi ptr [ %19, %17 ], [ %0, %2 ] + %6 = load ptr, ptr %5, align 8 + %7 = load ptr, ptr %6, align 8 + %8 = ptrtoint ptr %7 to i64 + %9 = sub i64 %8, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %10 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %11 = lshr i64 %9, %10 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %12 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %13 = shl i64 %9, %12 + %14 = or i64 %11, %13 + %15 = icmp ule i64 %14, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %15, label %17, label %16 + +16: + call void @llvm.ubsantrap(i8 2) + unreachable + +17: + %18 = load ptr, ptr %7, align 8 + call void %18(ptr noundef nonnull align 8 dereferenceable(8) %6) + %19 = getelementptr inbounds nuw i8, ptr %5, i64 8 + %20 = icmp eq ptr %19, %1 + br i1 %20, label %21, label %4 + +21: + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +!0 = !{i64 0, i64 256} +!1 = !{i64 0, i64 128} |