aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/CloneFunction.cpp
diff options
context:
space:
mode:
authorSerge Pavlov <sepavloff@gmail.com>2019-11-01 20:49:35 +0700
committerSerge Pavlov <sepavloff@gmail.com>2022-03-31 19:15:52 +0700
commit47b3b76825dc89d4ee37408f26b458f61f86fbf5 (patch)
tree5e6584bf65150bfeea7a45d09fb485633d3327eb /llvm/lib/Transforms/Utils/CloneFunction.cpp
parentb4417075dc1cbfac0a3f777850ba77c031d7db3c (diff)
downloadllvm-47b3b76825dc89d4ee37408f26b458f61f86fbf5.zip
llvm-47b3b76825dc89d4ee37408f26b458f61f86fbf5.tar.gz
llvm-47b3b76825dc89d4ee37408f26b458f61f86fbf5.tar.bz2
Implement inlining of strictfp functions
According to the current design, if a floating point operation is represented by a constrained intrinsic somewhere in a function, all floating point operations in the function must be represented by constrained intrinsics. It imposes additional requirements to inlining mechanism. If non-strictfp function is inlined into strictfp function, all ordinary FP operations must be replaced with their constrained counterparts. Inlining strictfp function into non-strictfp is not implemented as it would require replacement of all FP operations in the host function, which now is undesirable due to expected performance loss. Differential Revision: https://reviews.llvm.org/D69798
Diffstat (limited to 'llvm/lib/Transforms/Utils/CloneFunction.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp100
1 files changed, 98 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index db252b3..91b28d9 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -322,6 +322,9 @@ struct PruningFunctionCloner {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
+ bool HostFuncIsStrictFP;
+
+ Instruction *cloneInstruction(BasicBlock::const_iterator II);
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
@@ -329,7 +332,10 @@ public:
const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo) {}
+ CodeInfo(codeInfo) {
+ HostFuncIsStrictFP =
+ newFunc->getAttributes().hasFnAttr(Attribute::StrictFP);
+ }
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -338,6 +344,89 @@ public:
};
} // namespace
+static bool hasRoundingModeOperand(Intrinsic::ID CIID) {
+ switch (CIID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC: \
+ return ROUND_MODE == 1;
+#define FUNCTION INSTRUCTION
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ llvm_unreachable("Unexpected constrained intrinsic id");
+ }
+}
+
+Instruction *
+PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) {
+ const Instruction &OldInst = *II;
+ Instruction *NewInst = nullptr;
+ if (HostFuncIsStrictFP) {
+ Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst);
+ if (CIID != Intrinsic::not_intrinsic) {
+ // Instead of cloning the instruction, a call to constrained intrinsic
+ // should be created.
+ // Assume the first arguments of constrained intrinsics are the same as
+ // the operands of original instruction.
+
+ // Determine overloaded types of the intrinsic.
+ SmallVector<Type *, 2> TParams;
+ SmallVector<Intrinsic::IITDescriptor, 8> Descriptor;
+ getIntrinsicInfoTableEntries(CIID, Descriptor);
+ for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) {
+ Intrinsic::IITDescriptor Operand = Descriptor[I];
+ switch (Operand.Kind) {
+ case Intrinsic::IITDescriptor::Argument:
+ if (Operand.getArgumentKind() !=
+ Intrinsic::IITDescriptor::AK_MatchType) {
+ if (I == 0)
+ TParams.push_back(OldInst.getType());
+ else
+ TParams.push_back(OldInst.getOperand(I - 1)->getType());
+ }
+ break;
+ case Intrinsic::IITDescriptor::SameVecWidthArgument:
+ ++I;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Create intrinsic call.
+ LLVMContext &Ctx = NewFunc->getContext();
+ Function *IFn =
+ Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams);
+ SmallVector<Value *, 4> Args;
+ unsigned NumOperands = OldInst.getNumOperands();
+ if (isa<CallInst>(OldInst))
+ --NumOperands;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ Value *Op = OldInst.getOperand(I);
+ Args.push_back(Op);
+ }
+ if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) {
+ FCmpInst::Predicate Pred = CmpI->getPredicate();
+ StringRef PredName = FCmpInst::getPredicateName(Pred);
+ Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName)));
+ }
+
+ // The last arguments of a constrained intrinsic are metadata that
+ // represent rounding mode (absents in some intrinsics) and exception
+ // behavior. The inlined function uses default settings.
+ if (hasRoundingModeOperand(CIID))
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")));
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")));
+
+ NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict");
+ }
+ }
+ if (!NewInst)
+ NewInst = II->clone();
+ return NewInst;
+}
+
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(
@@ -377,7 +466,14 @@ void PruningFunctionCloner::CloneBlock(
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
++II) {
- Instruction *NewInst = II->clone();
+ Instruction *NewInst = cloneInstruction(II);
+
+ if (HostFuncIsStrictFP) {
+ // All function calls in the inlined function must get 'strictfp'
+ // attribute to prevent undesirable optimizations.
+ if (auto *Call = dyn_cast<CallInst>(NewInst))
+ Call->addFnAttr(Attribute::StrictFP);
+ }
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.