aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp2
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h2
-rw-r--r--llvm/lib/SandboxIR/Instruction.cpp27
-rw-r--r--llvm/lib/Support/DeltaAlgorithm.cpp1
-rw-r--r--llvm/lib/Support/MD5.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp18
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp23
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp78
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td83
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp46
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td3
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp14
31 files changed, 304 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4f6a19f..d656f10 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3482,7 +3482,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
bool IRTranslator::translateAtomicRMW(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
+ if (!MF->getTarget().getTargetTriple().isSPIRV() && containsBF16Type(U))
return false;
const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbc1d73..80bbfea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -76,7 +76,6 @@
#include <cstdlib>
#include <limits>
#include <optional>
-#include <set>
#include <string>
#include <utility>
#include <vector>
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index 91b3dd5..c82edd9 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -15,8 +15,6 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/TimeProfiler.h"
-#include <map>
-
using namespace llvm;
using namespace llvm::msf;
using namespace llvm::support;
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 0aefe6e..769dbd4 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -21,7 +21,6 @@
#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
-#include <array>
#include <cctype>
#include <cstdio>
#include <optional>
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
index 55442e0..50ba2f8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -23,8 +23,6 @@
#define DEBUG_TYPE "jitlink"
-#include <list>
-
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 343218e..91021e4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -21,8 +21,6 @@
#include "EHFrameSupportImpl.h"
#include "JITLinkGeneric.h"
-#include <list>
-
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp
index 1a81d18..9ae4c98 100644
--- a/llvm/lib/SandboxIR/Instruction.cpp
+++ b/llvm/lib/SandboxIR/Instruction.cpp
@@ -1125,6 +1125,33 @@ void SwitchInst::setDefaultDest(BasicBlock *DefaultCase) {
cast<llvm::SwitchInst>(Val)->setDefaultDest(
cast<llvm::BasicBlock>(DefaultCase->Val));
}
+
+template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+ConstT *
+SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseValue() const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ auto *LLVMC = Ctx.getValue(LLVMCaseHandle.getCaseValue());
+ return cast<ConstT>(LLVMC);
+}
+
+template <typename LLVMCaseItT, typename BlockT, typename ConstT>
+BlockT *
+SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseSuccessor()
+ const {
+ const auto &LLVMCaseHandle = *LLVMCaseIt;
+ auto *LLVMBB = LLVMCaseHandle.getCaseSuccessor();
+ return cast<BlockT>(Ctx.getValue(LLVMBB));
+}
+
+template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::CaseIt, BasicBlock,
+ ConstantInt>;
+template class SwitchInst::CaseItImpl<llvm::SwitchInst::CaseIt, BasicBlock,
+ ConstantInt>;
+template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
+template class SwitchInst::CaseItImpl<llvm::SwitchInst::ConstCaseIt,
+ const BasicBlock, const ConstantInt>;
+
ConstantInt *SwitchInst::findCaseDest(BasicBlock *BB) {
auto *LLVMC = cast<llvm::SwitchInst>(Val)->findCaseDest(
cast<llvm::BasicBlock>(BB->Val));
diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp
index d763cde..e91ee91 100644
--- a/llvm/lib/Support/DeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -8,7 +8,6 @@
#include "llvm/ADT/DeltaAlgorithm.h"
#include <algorithm>
#include <iterator>
-#include <set>
using namespace llvm;
DeltaAlgorithm::~DeltaAlgorithm() = default;
diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index 3bff4e1..32e2a2e 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -43,7 +43,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
-#include <array>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8a038f..76a790dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26050,7 +26050,7 @@ static SDValue performCSELCombine(SDNode *N,
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
- return Folded;
+ return Folded;
// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 14b0f9a..3940246 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
+ assert((DstSize == 64 || DstSize == 128) &&
+ "Unexpected vector constant size");
+
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
@@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
- Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ Register NewDst = MRI.createVirtualRegister(
+ DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
NewOp->getOperand(0).setReg(NewDst);
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
}
return nullptr;
};
MachineInstr *R;
- if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
- (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
+ if ((R = TryWithFNeg(DefBits, 32,
+ DstSize == 64 ? AArch64::FNEGv2f32
+ : AArch64::FNEGv4f32)) ||
+ (R = TryWithFNeg(DefBits, 64,
+ DstSize == 64 ? AArch64::FNEGDr
+ : AArch64::FNEGv2f64)) ||
(STI.hasFullFP16() &&
- (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
+ (R = TryWithFNeg(DefBits, 16,
+ DstSize == 64 ? AArch64::FNEGv4f16
+ : AArch64::FNEGv8f16))))
return R;
}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e67db8e..b119146 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1402,7 +1402,7 @@ static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::createImm(U | (imm << 4) | Rm));
} else {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(U));
}
@@ -1922,7 +1922,7 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
imm |= fieldFromInstruction(Insn, 24, 1) << 1;
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
+ Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
return S;
}
@@ -3703,17 +3703,17 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
} else if (Inst.getOpcode() == ARM::tADDspr) {
unsigned Rm = fieldFromInstruction(Insn, 3, 4);
Inst.addOperand(MCOperand::createReg(ARM::SP));
Inst.addOperand(MCOperand::createReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
}
return S;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
index 8707b08..f2c00c7 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
@@ -18,7 +18,6 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBufferRef.h"
-#include <map>
#include <memory>
#include <string>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 526b4de..04a9760 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
return AtomicExpansionKind::LLSC;
}
+
+bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit since
+ // this will fold the and/cmp/br into a single tstbit instruction.
+ ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getValue().isPowerOf2();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8d04edb..4ac3e76 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -160,6 +160,8 @@ public:
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
/// Return true if an FMA operation is faster than a pair of mul and add
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true (and FMAs are legal), otherwise fmuladd is
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 6dd83c1..2ee3b9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -198,7 +198,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
// Reaching Def to an offset register can't be a phi.
if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
MI.getParent() != UseMI.getParent())
- return false;
+ return false;
const MCInstrDesc &UseMID = UseMI.getDesc();
if ((!UseMID.mayLoad() && !UseMID.mayStore()) ||
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index ce2de75..a3c8a88 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -28,7 +28,6 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <map>
#include <optional>
using namespace llvm;
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index f548a8d..5107c8d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
}
}
-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+
+ if (!ST->hasUAL())
+ return Options;
+
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = Options.MaxNumLoads;
+ Options.AllowOverlappingLoads = true;
+
+ // TODO: Support for vectors.
+ if (ST->is64Bit()) {
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.AllowedTailExpansions = {3, 5, 6};
+ } else {
+ Options.LoadSizes = {4, 2, 1};
+ Options.AllowedTailExpansions = {3};
+ }
+
+ return Options;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index e3f16c7..9b479f9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -55,7 +55,8 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const override;
- // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+ TTI::MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 5d08f56..aa94f54 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -405,9 +405,9 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
}
if (SecondRegSaved)
copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+ } else {
+ Available.reset(SpReg);
}
- else
- Available.reset(SpReg);
copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
BuildMI(MBB, II, DL, get(Mips::AdduRxRyRz16), Reg)
.addReg(SpReg, RegState::Kill)
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index d92ae8d..21d7768 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -25,7 +25,6 @@
#include "llvm/Support/Alignment.h"
#include "llvm/Support/FormatVariadic.h"
#include <cstdarg>
-#include <set>
#include <string>
namespace llvm {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1977d33..a3ccbd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,11 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
+// TODO: Support more ops
+static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS,
+ ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -1208,6 +1213,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
+ // Sets common actions for zvfbfa, some of instructions are supported
+ // natively so that we don't need to promote them.
+ const auto SetZvfbfaActions = [&](MVT VT) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+ ISD::VECTOR_COMPRESS},
+ VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
+ setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+
+ MVT EltVT = VT.getVectorElementType();
+ if (isTypeLegal(EltVT))
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+ else
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
+ EltVT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
+ ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
+ ISD::VP_SCATTER},
+ VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(FloatingPointLibCallOps, VT, Expand);
+
+ // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+ if (getLMUL(VT) == RISCVVType::LMUL_8) {
+ setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+ } else {
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+ }
+ };
+
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
@@ -1222,7 +1282,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasVInstructionsBF16Minimal()) {
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ SetZvfbfaActions(VT);
+ }
+ } else if (Subtarget.hasVInstructionsBF16Minimal()) {
for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
@@ -1501,6 +1567,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
}
+ if (Subtarget.hasStdExtZvfbfa()) {
+ setOperationAction(ZvfbfaOps, VT, Custom);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+ }
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
@@ -7245,7 +7315,11 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
return (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
- Op.getValueType() == MVT::nxv32bf16;
+ (Op.getValueType() == MVT::nxv32bf16 &&
+ Subtarget.hasVInstructionsBF16Minimal() &&
+ (!Subtarget.hasVInstructionsBF16() ||
+ (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
+ !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
}
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index b9c5b75..ffb2ac0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
-}
+
+ foreach vti = AllBF16Vectors in {
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg vti.RegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (riscv_fneg_vl vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ srcvalue,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatFPOp vti.ScalarRegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index f681b0d..ac09b93 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -29,6 +29,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float16_add},
{"SPV_EXT_shader_atomic_float_min_max",
SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float_min_max},
+ {"SPV_INTEL_16bit_atomics",
+ SPIRV::Extension::Extension::SPV_INTEL_16bit_atomics},
{"SPV_EXT_arithmetic_fence",
SPIRV::Extension::Extension::SPV_EXT_arithmetic_fence},
{"SPV_EXT_demote_to_helper_invocation",
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index af76016..fbb127d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1058,6 +1058,13 @@ static void addOpTypeImageReqs(const MachineInstr &MI,
}
}
+static bool isBFloat16Type(const SPIRVType *TypeDef) {
+ return TypeDef && TypeDef->getNumOperands() == 3 &&
+ TypeDef->getOpcode() == SPIRV::OpTypeFloat &&
+ TypeDef->getOperand(1).getImm() == 16 &&
+ TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR;
+}
+
// Add requirements for handling atomic float instructions
#define ATOM_FLT_REQ_EXT_MSG(ExtName) \
"The atomic float instruction requires the following SPIR-V " \
@@ -1081,11 +1088,21 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_add);
switch (BitWidth) {
case 16:
- if (!ST.canUseExtension(
- SPIRV::Extension::SPV_EXT_shader_atomic_float16_add))
- report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false);
- Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add);
- Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT);
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics))
+ report_fatal_error(
+ "The atomic bfloat16 instruction requires the following SPIR-V "
+ "extension: SPV_INTEL_16bit_atomics",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics);
+ Reqs.addCapability(SPIRV::Capability::AtomicBFloat16AddINTEL);
+ } else {
+ if (!ST.canUseExtension(
+ SPIRV::Extension::SPV_EXT_shader_atomic_float16_add))
+ report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false);
+ Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add);
+ Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT);
+ }
break;
case 32:
Reqs.addCapability(SPIRV::Capability::AtomicFloat32AddEXT);
@@ -1104,7 +1121,17 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_min_max);
switch (BitWidth) {
case 16:
- Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT);
+ if (isBFloat16Type(TypeDef)) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics))
+ report_fatal_error(
+ "The atomic bfloat16 instruction requires the following SPIR-V "
+ "extension: SPV_INTEL_16bit_atomics",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics);
+ Reqs.addCapability(SPIRV::Capability::AtomicBFloat16MinMaxINTEL);
+ } else {
+ Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT);
+ }
break;
case 32:
Reqs.addCapability(SPIRV::Capability::AtomicFloat32MinMaxEXT);
@@ -1328,13 +1355,6 @@ void addPrintfRequirements(const MachineInstr &MI,
}
}
-static bool isBFloat16Type(const SPIRVType *TypeDef) {
- return TypeDef && TypeDef->getNumOperands() == 3 &&
- TypeDef->getOpcode() == SPIRV::OpTypeFloat &&
- TypeDef->getOperand(1).getImm() == 16 &&
- TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR;
-}
-
void addInstrRequirements(const MachineInstr &MI,
SPIRV::ModuleAnalysisInfo &MAI,
const SPIRVSubtarget &ST) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 65a8885..f02a587 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -389,6 +389,7 @@ defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>;
defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>;
defm SPV_INTEL_bfloat16_arithmetic
: ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>;
+defm SPV_INTEL_16bit_atomics : ExtensionOperand<130, [EnvVulkan, EnvOpenCL]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -566,9 +567,11 @@ defm FloatControls2
defm AtomicFloat32AddEXT : CapabilityOperand<6033, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
defm AtomicFloat64AddEXT : CapabilityOperand<6034, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
defm AtomicFloat16AddEXT : CapabilityOperand<6095, 0, 0, [SPV_EXT_shader_atomic_float16_add], []>;
+defm AtomicBFloat16AddINTEL : CapabilityOperand<6255, 0, 0, [SPV_INTEL_16bit_atomics], []>;
defm AtomicFloat16MinMaxEXT : CapabilityOperand<5616, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
defm AtomicFloat32MinMaxEXT : CapabilityOperand<5612, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
defm AtomicFloat64MinMaxEXT : CapabilityOperand<5613, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>;
+defm AtomicBFloat16MinMaxINTEL : CapabilityOperand<6256, 0, 0, [SPV_INTEL_16bit_atomics], []>;
defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variable_length_array], []>;
defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>;
defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>;
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index f08a0c0..94ae64c 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -14,7 +14,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
-#include <array>
#include <atomic>
#include <optional>
#include <string>
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bd74388..8e76b79 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -83,7 +83,6 @@
#include <cstdint>
#include <functional>
#include <limits>
-#include <map>
#include <memory>
#include <queue>
#include <string>
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 27ec6c6..5bc9c28 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5627,8 +5627,15 @@ bool InstCombinerImpl::run() {
for (Use &U : I->uses()) {
User *User = U.getUser();
- if (User->isDroppable())
- continue;
+ if (User->isDroppable()) {
+ // Do not sink if there are dereferenceable assumes that would be
+ // removed.
+ auto II = dyn_cast<IntrinsicInst>(User);
+ if (II->getIntrinsicID() != Intrinsic::assume ||
+ !II->getOperandBundle("dereferenceable"))
+ continue;
+ }
+
if (NumUsers > MaxSinkNumUsers)
return std::nullopt;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 666033b..45b5570 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8157,9 +8157,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands());
if (Instruction::isCast(VPI->getOpcode())) {
+ auto *CastR = cast<VPInstructionWithType>(R);
auto *CI = cast<CastInst>(Instr);
return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0),
- CI->getType(), *CI);
+ CastR->getResultType(), *CI);
}
return tryToWiden(VPI);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f405c40..663e31a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
#define DEBUG_TYPE "vplan"
@@ -233,10 +234,15 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
for (Value *Op : Inst->operands())
VPOperands.push_back(getOrCreateVPOperand(Op));
- // Build VPInstruction for any arbitrary Instruction without specific
- // representation in VPlan.
- NewR = cast<VPInstruction>(
- VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ if (auto *CI = dyn_cast<CastInst>(Inst)) {
+ NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
+ CI->getType(), CI->getDebugLoc());
+ NewR->setUnderlyingValue(CI);
+ } else {
+ // Build VPInstruction for any arbitrary Instruction without specific
+ // representation in VPlan.
+ NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst);
+ }
}
IRDef2VPValue[Inst] = NewR;