diff options
author | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
---|---|---|
committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
commit | e2810c9a248f4c7fbfae84bb32b6f7e01027458b (patch) | |
tree | ae0b02a8491b969a1cee94ea16ffe42c559143c5 /llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | |
parent | fa04eb4af95c1ca7377279728cb004bcd2324d01 (diff) | |
parent | bdcf47e4bcb92889665825654bb80a8bbe30379e (diff) | |
download | llvm-users/chapuni/cov/single/switch.zip llvm-users/chapuni/cov/single/switch.tar.gz llvm-users/chapuni/cov/single/switch.tar.bz2 |
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/switchusers/chapuni/cov/single/switch
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 89 |
1 files changed, 81 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0566a87..25b6731 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -35,6 +35,9 @@ using namespace llvm::PatternMatch; static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden); +static cl::opt<bool> SVEPreferFixedOverScalableIfEqualCost( + "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden); + static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden); @@ -256,7 +259,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, CalleeAttrs.set(SMEAttrs::SM_Enabled, true); } - if (CalleeAttrs.isNewZA()) + if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0()) return false; if (CallerAttrs.requiresLazySave(CalleeAttrs) || @@ -1635,10 +1638,8 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( m_ConstantInt<AArch64SVEPredPattern::all>()))) return std::nullopt; - IRBuilderBase::FastMathFlagGuard FMFGuard(IC.Builder); - IC.Builder.setFastMathFlags(II.getFastMathFlags()); - auto BinOp = - IC.Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2)); + auto BinOp = IC.Builder.CreateBinOpFMF( + BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags()); return IC.replaceInstUsesWith(II, BinOp); } @@ -2760,6 +2761,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); + static const TypeConversionCostTblEntry BF16Tbl[] = { + {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2 + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn + }; + + if (ST->hasBF16()) + if (const auto *Entry = ConvertCostTableLookup( + BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return AdjustCost(Entry->Cost); + static const TypeConversionCostTblEntry ConversionTbl[] = { {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn @@ -2847,6 +2863,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl + // BF16 (uses shift) + {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl + {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt + {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll + {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2 + {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl + {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2 + {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2 // FP Ext and trunc {ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn @@ -2859,6 +2883,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn + // BF16 (more complex, with +bf16 is handled above) + {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns + {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8}, + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8}, + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15}, + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9}, + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10}, + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19}, // LowerVectorINT_TO_FP: {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, @@ -4705,10 +4738,21 @@ InstructionCost AArch64TTIImpl::getShuffleCost( } Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); - // Treat extractsubvector as single op permutation. bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; - if (IsExtractSubvector && LT.second.isFixedLengthVector()) + // A sebvector extract can be implemented with a ext (or trivial extract, if + // from lane 0). This currently only handles low or high extracts to prevent + // SLP vectorizer regressions. + if (IsExtractSubvector && LT.second.isFixedLengthVector()) { + if (LT.second.is128BitVector() && + cast<FixedVectorType>(SubTp)->getNumElements() == + LT.second.getVectorNumElements() / 2) { + if (Index == 0) + return 0; + if (Index == (int)LT.second.getVectorNumElements() / 2) + return 1; + } Kind = TTI::SK_PermuteSingleSrc; + } // Check for broadcast loads, which are supported by the LD1R instruction. // In terms of code-size, the shuffle vector is free when a load + dup get @@ -4919,6 +4963,12 @@ static bool containsDecreasingPointers(Loop *TheLoop, return false; } +bool AArch64TTIImpl::preferFixedOverScalableIfEqualCost() const { + if (SVEPreferFixedOverScalableIfEqualCost.getNumOccurrences()) + return SVEPreferFixedOverScalableIfEqualCost; + return ST->useFixedOverScalableIfEqualCost(); +} + unsigned AArch64TTIImpl::getEpilogueVectorizationMinVF() const { return ST->getEpilogueVectorizationMinVF(); } @@ -5283,11 +5333,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } - // Sink vscales closer to uses for better isel + auto ShouldSinkCondition = [](Value *Cond) -> bool { + auto *II = dyn_cast<IntrinsicInst>(Cond); + return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or && + isa<ScalableVectorType>(II->getOperand(0)->getType()); + }; + switch (I->getOpcode()) { case Instruction::GetElementPtr: case Instruction::Add: case Instruction::Sub: + // Sink vscales closer to uses for better isel for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) { if (shouldSinkVScale(I->getOperand(Op), Ops)) { Ops.push_back(&I->getOperandUse(Op)); @@ -5295,6 +5351,23 @@ bool AArch64TTIImpl::isProfitableToSinkOperands( } } break; + case Instruction::Select: { + if (!ShouldSinkCondition(I->getOperand(0))) + return false; + + Ops.push_back(&I->getOperandUse(0)); + return true; + } + case Instruction::Br: { + if (cast<BranchInst>(I)->isUnconditional()) + return false; + + if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition())) + return false; + + Ops.push_back(&I->getOperandUse(0)); + return true; + } default: break; } |