diff options
author | Momchil Velikov <momchil.velikov@arm.com> | 2024-06-17 17:57:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-17 17:57:07 +0100 |
commit | d1a4f0c9fb559eb4c2fb56112e56343bcd333edc (patch) | |
tree | 9e4fabe1dfd9edfc0c4a2a9dea8cbf59e15b5120 /llvm/lib | |
parent | 5914a5671a1596f68189c8408f8d877e6b6373bf (diff) | |
download | llvm-d1a4f0c9fb559eb4c2fb56112e56343bcd333edc.zip llvm-d1a4f0c9fb559eb4c2fb56112e56343bcd333edc.tar.gz llvm-d1a4f0c9fb559eb4c2fb56112e56343bcd333edc.tar.bz2 |
[AArch64] Lower extending sitofp using tbl (#92528)
In a similar manner as in https://reviews.llvm.org/D133494
use `TBL` to place bytes in the *upper* part of `i32` elements
and then convert to float using fixed-point `scvtf`, i.e.
scvtf Vd.4s, Vn.4s, #24
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 37 |
2 files changed, 37 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index bb2c76d..aee9935 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -8331,7 +8331,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; - if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) && + if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) || + isa<TruncInst>(I)) && TLI->optimizeExtendOrTruncateConversion( I, LI->getLoopFor(I->getParent()), *TTI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c5c3ef0..35871cc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15869,6 +15869,24 @@ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, return Result; } +static Value *createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, + FixedVectorType *DstTy, + bool IsLittleEndian) { + auto *SrcTy = cast<FixedVectorType>(Op->getType()); + auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth(); + auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth(); + + SmallVector<int> Mask; + if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(), + !IsLittleEndian, Mask)) + return nullptr; + + auto *FirstEltZero = Builder.CreateInsertElement( + PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0)); + + return Builder.CreateShuffleVector(Op, FirstEltZero, Mask); +} + static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) { IRBuilder<> Builder(TI); SmallVector<Value *> Parts; @@ -16049,14 +16067,29 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( Value *ZExt = createTblShuffleForZExt( Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy), FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian()); - if (!ZExt) - return false; + assert(ZExt && "Cannot fail for the i8 to float conversion"); auto *UI = Builder.CreateUIToFP(ZExt, DstTy); I->replaceAllUsesWith(UI); I->eraseFromParent(); return true; } + auto *SIToFP = dyn_cast<SIToFPInst>(I); + if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) && + DstTy->getElementType()->isFloatTy()) { + IRBuilder<> Builder(I); + auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0), + FixedVectorType::getInteger(DstTy), + Subtarget->isLittleEndian()); + assert(Shuffle && "Cannot fail for the i8 to float conversion"); + auto *Cast = Builder.CreateBitCast(Shuffle, VectorType::getInteger(DstTy)); + auto *AShr = Builder.CreateAShr(Cast, 24, "", true); + auto *SI = Builder.CreateSIToFP(AShr, DstTy); + I->replaceAllUsesWith(SI); + I->eraseFromParent(); + return true; + } + // Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui // followed by a truncate lowered to using tbl.4. auto *FPToUI = dyn_cast<FPToUIInst>(I); |